https://github.com/vzakhari updated 
https://github.com/llvm/llvm-project/pull/134002

>From eada8e73210a92e6274b99ede1ab694e9c57c4de Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakha...@nvidia.com>
Date: Tue, 1 Apr 2025 15:55:30 -0700
Subject: [PATCH 1/3] [flang] Added driver options for arrays repacking.

Added options:
  * -f[no-]repack-arrays
  * -f[no-]stack-repack-arrays
  * -frepack-arrays-contiguity=whole/innermost
---
 clang/include/clang/Driver/Options.td         | 62 ++++++++++++++++++-
 clang/lib/Driver/ToolChains/Flang.cpp         | 30 ++++++---
 flang/docs/ArrayRepacking.md                  | 19 +++---
 flang/include/flang/Lower/LoweringOptions.def |  5 ++
 flang/lib/Frontend/CompilerInvocation.cpp     | 14 +++++
 flang/lib/Lower/ConvertVariable.cpp           |  2 +-
 .../test/Driver/frepack-arrays-contiguity.f90 | 27 ++++++++
 flang/test/Driver/frepack-arrays.f90          | 24 +++++++
 flang/test/Driver/fstack-repack-arrays.f90    | 24 +++++++
 flang/test/Lower/repack-arrays.f90            |  8 +--
 flang/tools/bbc/bbc.cpp                       | 12 +++-
 11 files changed, 199 insertions(+), 28 deletions(-)
 create mode 100644 flang/test/Driver/frepack-arrays-contiguity.f90
 create mode 100644 flang/test/Driver/frepack-arrays.f90
 create mode 100644 flang/test/Driver/fstack-repack-arrays.f90

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index e69b804de63b5..43bcdc6d1111f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6825,7 +6825,6 @@ defm real_8_real_10 : BooleanFFlag<"real-8-real-10">, 
Group<gfortran_Group>;
 defm real_8_real_16 : BooleanFFlag<"real-8-real-16">, Group<gfortran_Group>;
 defm real_8_real_4 : BooleanFFlag<"real-8-real-4">, Group<gfortran_Group>;
 defm recursive : BooleanFFlag<"recursive">, Group<gfortran_Group>;
-defm repack_arrays : BooleanFFlag<"repack-arrays">, Group<gfortran_Group>;
 defm second_underscore : BooleanFFlag<"second-underscore">, 
Group<gfortran_Group>;
 defm sign_zero : BooleanFFlag<"sign-zero">, Group<gfortran_Group>;
 defm whole_file : BooleanFFlag<"whole-file">, Group<gfortran_Group>;
@@ -6967,6 +6966,51 @@ defm unsigned : OptInFC1FFlag<"unsigned", "Enables 
UNSIGNED type">;
 def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
   HelpText<"Implies the SAVE attribute for non-automatic local objects in 
subprograms unless RECURSIVE">;
 
+defm repack_arrays
+    : BoolOptionWithoutMarshalling<
+          "f", "repack-arrays", PosFlag<SetTrue, [], [], "Pack">,
+          NegFlag<SetFalse, [], [], "Do not pack">,
+          BothFlags<[], [],
+                    " non-contiguous assumed shape dummy arrays into "
+                    "contiguous memory">>,
+      DocBrief<[{Create temporary copies of non-contiguous assumed shape dummy
+arrays in subprogram prologues, and destroy them in subprotram epilogues.
+The temporary copy is initialized with values from the original array
+in the prologue, if needed. In the epilogue, the current values
+in the temporary array are copied into the original array, if needed.
+
+Accessing the contiguous temporary in the program code may result
+in faster execution comparing to accessing elements of the original array,
+when they are sparse in memory. At the same time, the overhead
+of copying values between the original and the temporary arrays
+may be significant, which may slow down some programs.
+
+Enabling array repacking may also change the behavior of certain
+programs:
+* The copy actions may introduce a data race in valid OpenACC/OpenMP programs.
+  For example, if different threads execute the same subprogram
+  with a non-contiguous assumed shape dummy array, and the different threads
+  access unrelated parts of the array, then the whole array copy
+  made in each thread will cause a data race.
+* OpenACC/OpenMP offload programs may behave incorrectly with regards
+  to the device data environment, due to the fact that the original
+  array and the temporary may have different presence status on the device.
+* ``IS_CONTIGUOUS`` intrinsic may return ``TRUE`` with the array repacking
+  enabled, whereas if would return ``FALSE`` with the repacking disabled.
+* The result of ``LOC`` intrinsic applied to an actual argument associated
+  with a non-contiguous assumed shape dummy array, may be different
+  from the result of ``LOC`` applied to the dummy array.}]>;
+
+def frepack_arrays_contiguity_EQ
+    : Joined<["-"], "frepack-arrays-contiguity=">,
+      Group<f_Group>,
+      Values<"whole,innermost">,
+      HelpText<
+          "When -frepack-arrays is in effect, 'whole' enables "
+          "repacking for arrays that are non-contiguous in any dimension, "
+          "'innermost' enables repacking for arrays that are non-contiguous "
+          "in the innermost dimension (the default)">;
+
 defm save_main_program : BoolOptionWithoutMarshalling<"f", "save-main-program",
   PosFlag<SetTrue, [], [],
     "Place all main program variables in static memory (otherwise scalars may 
be placed on the stack)">,
@@ -6980,6 +7024,22 @@ defm loop_versioning : BoolOptionWithoutMarshalling<"f", 
"version-loops-for-stri
   PosFlag<SetTrue, [], [ClangOption], "Create unit-strided versions of loops">,
    NegFlag<SetFalse, [], [ClangOption], "Do not create unit-strided loops 
(default)">>;
 
+defm stack_repack_arrays
+    : BoolOptionWithoutMarshalling<
+          "f", "stack-repack-arrays",
+          PosFlag<SetTrue, [], [],
+                  "Attempt to allocate array temporaries created under "
+                  "-frepack-arrays on the stack">,
+          NegFlag<
+              SetFalse, [], [],
+              "Allocate -frepack-arrays temporaries on the heap (default)">>,
+      DocBrief<[{Controls whether the array temporaries created under
+**-frepack-arrays** are allocated on the stack or on the heap.
+
+By default, the heap is used. Allocations of the polymorphic types
+are always done on the heap, though this may change in future releases.
+  }]>;
+
 def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, 
Group<f_Group>,
   HelpText<"Emit hermetic module files (no nested USE association)">;
 
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 8312234e33a64..96e2486da764c 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -157,16 +157,26 @@ void Flang::addCodegenOptions(const ArgList &Args,
   if (shouldLoopVersion(Args))
     CmdArgs.push_back("-fversion-loops-for-stride");
 
-  Args.addAllArgs(CmdArgs,
-                  {options::OPT_fdo_concurrent_to_openmp_EQ,
-                   options::OPT_flang_experimental_hlfir,
-                   options::OPT_flang_deprecated_no_hlfir,
-                   options::OPT_fno_ppc_native_vec_elem_order,
-                   options::OPT_fppc_native_vec_elem_order,
-                   options::OPT_finit_global_zero,
-                   options::OPT_fno_init_global_zero, 
options::OPT_ftime_report,
-                   options::OPT_ftime_report_EQ, options::OPT_funroll_loops,
-                   options::OPT_fno_unroll_loops});
+  for (const auto &arg :
+       Args.getAllArgValues(options::OPT_frepack_arrays_contiguity_EQ))
+    if (arg.compare("whole") != 0 && arg.compare("innermost") != 0) {
+      
getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
+          << "-frepack-arrays-contiguity=" << arg;
+    }
+
+  Args.addAllArgs(
+      CmdArgs,
+      {options::OPT_fdo_concurrent_to_openmp_EQ,
+       options::OPT_flang_experimental_hlfir,
+       options::OPT_flang_deprecated_no_hlfir,
+       options::OPT_fno_ppc_native_vec_elem_order,
+       options::OPT_fppc_native_vec_elem_order, options::OPT_finit_global_zero,
+       options::OPT_fno_init_global_zero, options::OPT_frepack_arrays,
+       options::OPT_fno_repack_arrays,
+       options::OPT_frepack_arrays_contiguity_EQ,
+       options::OPT_fstack_repack_arrays, options::OPT_fno_stack_repack_arrays,
+       options::OPT_ftime_report, options::OPT_ftime_report_EQ,
+       options::OPT_funroll_loops, options::OPT_fno_unroll_loops});
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
diff --git a/flang/docs/ArrayRepacking.md b/flang/docs/ArrayRepacking.md
index 87cfc5d1bb4bc..7de599f293e40 100755
--- a/flang/docs/ArrayRepacking.md
+++ b/flang/docs/ArrayRepacking.md
@@ -39,13 +39,13 @@ Having these results it seems reasonable to provide support 
for arrays repacking
 
 #### Facts and guesses about the implementation
 
-The dynamic checks for continuity and the array copy code is located 
completely in the 
[runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35),
 so the compiler inserts unconditional calls in the subprogram 
prologue/epilogue.
+The dynamic checks for contiguity and the array copy code is located 
completely in the 
[runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35),
 so the compiler inserts unconditional calls in the subprogram 
prologue/epilogue.
 
 It looks like `gfortran` ignores `intent(out)/intent(in)` which could have 
helped to avoid some of the `pack/unpack` overhead.
 
 It looks like the `pack`/`unpack` actions are inserted early in the 
compilation pipeline, and these extra calls affect behavior of the later 
optimization passes. For example, `Polyhedron/fatigue2` slows down by about 2x 
with `-frepack-arrays`: this slowdown is not caused by the `pack`/`unpack` 
overhead, but is a consequence of worse function inlining decisions made after 
the calls insertion. The benchmarks becomes even faster than the original 
version with `-frepack-arrays` and proper `-finline-limit=` settings, but it 
does not look like the benchmark contains code that would benefit from the 
array repacking.
 
-It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code 
after the function inlining, if the actual argument is statically known to be 
contiguous. So the overhead from the dynamic continuity checks is inevitable 
when `-frepack-arrays` is specified.
+It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code 
after the function inlining, if the actual argument is statically known to be 
contiguous. So the overhead from the dynamic contiguity checks is inevitable 
when `-frepack-arrays` is specified.
 
 It does not look like `gfortran` tries to optimize the insertion of 
`pack`/`unpack` code. For example, if a dummy array is only used under a 
condition within the subprogram, the repacking code might be inserted under the 
same condition to minimize the overhead on the unconditional path through the 
subprogram.
 
@@ -59,7 +59,7 @@ It does not look like `gfortran` tries to optimize the 
insertion of `pack`/`unpa
 
 #### Facts and guesses about the implementation
 
-The `pack` code is only generated if the actual argument may be non-contiguous 
in the innermost dimension, as determined statically, i.e. the compiler does 
not generate any dynamic continuity checks. For example:
+The `pack` code is only generated if the actual argument may be non-contiguous 
in the innermost dimension, as determined statically, i.e. the compiler does 
not generate any dynamic contiguity checks. For example:
 
 ```Fortran
 interface
@@ -132,8 +132,8 @@ So it does not seem practical/reasonable to enable the 
array repacking by defaul
 ### Performance
 
 1. Minimize the overhead of array repacking, e.g. avoid copy-in/out whenever 
possible, execute copy-in/out only on the execution paths where the array is 
accessed.
-2. Provide different modes of repacking depending on the "continuity" meaning, 
i.e. one - array is contiguous in the innermost dimension, two - array is 
contiguous in all dimensions.
-3. Avoid generating repacking code, when the "continuity" can be statically 
proven (including after optimization passes like constant propagation, function 
inlining, etc.).
+2. Provide different modes of repacking depending on the "contiguity" meaning, 
i.e. one - array is contiguous in the innermost dimension, two - array is 
contiguous in all dimensions.
+3. Avoid generating repacking code, when the "contiguity" can be statically 
proven (including after optimization passes like constant propagation, function 
inlining, etc.).
 4. Use a set of heuristics to avoid generating repacking code based on the 
array usage pattern, e.g. if an array is proven not to be used in an array 
expression or a loop, etc.
 5. Use a set of heuristics to avoid repacking actions dynamically, e.g. based 
on the array size, element size, byte stride(s) of the [innermost] 
dimension(s), etc.
 6. Minimize the impact of the IR changes, introduced by repacking, on the 
later optimization passes.
@@ -156,7 +156,7 @@ Controlled by cli options, Lowering will generate a 
`fir.pack_array` operation i
 The new operations will hold all the information that customizes further 
handling of the `pack`/`unpack` actions, such as:
 
 * Optional array of attributes supporting an interface to generate a predicate 
that says if the repacking is safe in the current context.
-* The continuity mode: `innermost` vs `whole`.
+* The contiguity mode: `innermost` vs `whole`.
 * Attributes selecting the heuristics (both compiler and runtime ones) that 
may be applied to avoid `pack`/`unpack` actions.
 * Other attributes, like `stack` vs `heap` to manage the temporary allocation 
according to `-fstack-arrays`, etc.
 
@@ -195,7 +195,7 @@ The operation creates a new `!fir.box/class<!fir.array<>>` 
value to represent ei
 Arguments:
 
 * `stack` - indicates if `-fstack-arrays` is in effect for compiling this 
function.
-* `innermost` - tells that the repacking has to be done iff the array is not 
contiguous in the innermost dimension. This also describes what type of 
continuity can be expected from `%new_var`, i.e. `innermost` means that the 
resulting array is definitely contiguous in the innermost dimension, but may be 
non-contiguous in other dimensions (unless additional analysis proves 
otherwise). For 1-D arrays, `innermost` attribute is not valid.
+* `innermost` - tells that the repacking has to be done iff the array is not 
contiguous in the innermost dimension. This also describes what type of 
contiguity can be expected from `%new_var`, i.e. `innermost` means that the 
resulting array is definitely contiguous in the innermost dimension, but may be 
non-contiguous in other dimensions (unless additional analysis proves 
otherwise). For 1-D arrays, `innermost` attribute is not valid.
 * `no_copy` - indicates that, in case a temporary array is created, `%var` to 
`%new_var` copy is not required (`intent(out)` dummy argument case).
 * `heuristics`
   * `loop-only` - `fir.pack_array` can be optimized away, if the array is not 
used in a loop.
@@ -351,7 +351,7 @@ The `fir.pack_array`'s copy-in action cannot be skipped for 
`INTENT(OUT)` dummy
 
 #### Optional behavior
 
-In case of the `whole` continuity mode or with 1-D array, Flang can propagate 
this information to `hlfir.declare` - this may improve optimizations down the 
road. This can be done iff the repacking has no dynamic constraints and/or 
heuristics. For example:
+In case of the `whole` contiguity mode or with 1-D array, Flang can propagate 
this information to `hlfir.declare` - this may improve optimizations down the 
road. This can be done iff the repacking has no dynamic constraints and/or 
heuristics. For example:
 
 ```
     %c0 = arith.constant 0 : index
@@ -441,10 +441,11 @@ In cases where `fir.pack_array` is statically known to 
produce a copy that is co
 The following user options are proposed:
 
 * `-frepack-arrays` - the option forces Flang to repack a non-contiguous 
assumed-shape dummy array into a temporary contiguous memory, which may result 
in faster accesses of the array. The compiler will insert special code in 
subprogram prologue to allocate a temporary array and copy the original array 
into the temporary; in subprogram epilogue, it will insert a copy from the 
temporary array into the original array and deallocate the temporary. The 
overhead of the allocation/deallocation and the copies may be significant 
depending on the array size. The compiler will try to optimize the 
unnecessary/unprofitable repacking.
+* `-fstack-repack-arrays` - attempt allocating the temporary arrays in stack 
memory. By default, they are allocated in heap memory (note that 
`-fstack-arrays` does not affect the allocation of the temporaries created for 
the arrays repacking).
 * `-frepack-arrays-opts=[none|loop-only]` - the option enables optimizations 
that may eliminate the array repacking code depending on the array usage 
pattern:
   * `none` - no optimizations.
   * `loop-only` - the array repacking code will be removed in any subprogram 
where the array is not used inside a loop or an array expression.
-* `-frepack-arrays-continuity=[whole|innermost]`:
+* `-frepack-arrays-contiguity=[whole|innermost]`:
   * `whole` - the option will repack arrays that are non-contiguous in any 
dimension (default).
   * `innermost` - the option will repack arrays that are non-contiguous in the 
innermost dimension.
 * `-frepack-arrays-max-size=<int>` - arrays bigger than the specified size 
will not be repacked.
diff --git a/flang/include/flang/Lower/LoweringOptions.def 
b/flang/include/flang/Lower/LoweringOptions.def
index 6735bea551414..d98823a0e3341 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -56,6 +56,11 @@ ENUM_LOWERINGOPT(StackArrays, unsigned, 1, 0)
 /// packed into contiguous memory.
 ENUM_LOWERINGOPT(RepackArrays, unsigned, 1, 0)
 
+/// If true, the temporary arrays created under RepackArrays
+/// control will be allocated in stack memory. If false,
+/// they will be allocated in heap memory.
+ENUM_LOWERINGOPT(StackRepackArrays, unsigned, 1, 0)
+
 /// If true, the repacking (RepackArrays option above)
 /// will be done for arrays non-contiguous in any dimension,
 /// otherwise, it will be done only for arrays non-contiguous
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 466d939b7b840..20a4a0a6cf745 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1476,6 +1476,19 @@ bool CompilerInvocation::createFromArgs(
                     clang::driver::options::OPT_fno_realloc_lhs, true))
     invoc.loweringOpts.setReallocateLHS(false);
 
+  invoc.loweringOpts.setRepackArrays(
+      args.hasFlag(clang::driver::options::OPT_frepack_arrays,
+                   clang::driver::options::OPT_fno_repack_arrays,
+                   /*default=*/false));
+  invoc.loweringOpts.setStackRepackArrays(
+      args.hasFlag(clang::driver::options::OPT_fstack_repack_arrays,
+                   clang::driver::options::OPT_fno_stack_repack_arrays,
+                   /*default=*/false));
+  if (auto *arg = args.getLastArg(
+          clang::driver::options::OPT_frepack_arrays_contiguity_EQ))
+    invoc.loweringOpts.setRepackArraysWhole(arg->getValue() ==
+                                            llvm::StringRef{"whole"});
+
   success &= parseFrontendArgs(invoc.getFrontendOpts(), args, diags);
   parseTargetArgs(invoc.getTargetOpts(), args);
   parsePreprocessorArgs(invoc.getPreprocessorOpts(), args);
@@ -1715,6 +1728,7 @@ void CompilerInvocation::setLoweringOptions() {
   const Fortran::common::LangOptions &langOptions = getLangOpts();
   loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() ==
                                     Fortran::common::LangOptions::SOB_Defined);
+  loweringOpts.setStackArrays(codegenOpts.StackArrays);
   Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions();
   // TODO: when LangOptions are finalized, we can represent
   //       the math related options using Fortran::commmon::MathOptionsBase,
diff --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 0b22b743edee9..366ff328bfa27 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -2630,7 +2630,7 @@ 
Fortran::lower::genPackArray(Fortran::lower::AbstractConverter &converter,
       });
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   const mlir::Location loc = genLocation(converter, sym);
-  bool stackAlloc = opts.getStackArrays();
+  bool stackAlloc = opts.getStackRepackArrays();
   // 1D arrays must always use 'whole' mode.
   bool isInnermostMode = !opts.getRepackArraysWhole() && sym.Rank() > 1;
   // Avoid copy-in for 'intent(out)' variable, unless this is a dummy
diff --git a/flang/test/Driver/frepack-arrays-contiguity.f90 
b/flang/test/Driver/frepack-arrays-contiguity.f90
new file mode 100644
index 0000000000000..d642cdac598af
--- /dev/null
+++ b/flang/test/Driver/frepack-arrays-contiguity.f90
@@ -0,0 +1,27 @@
+! Test forwarding just the forwarding of -frepack-arrays-contiguity options:
+! RUN: %flang -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=WHOLECMD %s
+! RUN: %flang -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 
| FileCheck --check-prefix=INNERMOSTCMD %s
+! RUN: %flang -frepack-arrays-contiguity=innermost 
-frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=WHOLECMD %s
+! RUN: %flang -frepack-arrays-contiguity=whole 
-frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=INNERMOSTCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=whole %s 
-emit-hlfir -o - | FileCheck --check-prefix=WHOLE %s
+! RUN: %flang_fc1 -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | 
FileCheck --check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=innermost %s 
-emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s
+! RUN: %flang_fc1 -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | 
FileCheck --check-prefix=NOREPACK %s
+
+! Default setting is 'innermost':
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=INNERMOST %s
+
+! WHOLECMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=whole"
+! INNERMOSTCMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=innermost"
+
+subroutine test(x)
+  real :: x(:,:)
+  ! WHOLE: fir.pack_array{{.*}}whole
+  ! WHOLE: fir.unpack_array
+  ! INERMOST: fir.pack_array{{.*}}innermost
+  ! INNERMOST: fir.unpack_array
+  ! NOREPACK-NOT: fir.pack_array
+  ! NOREPACK-NOT: fir.unpack_array
+end subroutine
diff --git a/flang/test/Driver/frepack-arrays.f90 
b/flang/test/Driver/frepack-arrays.f90
new file mode 100644
index 0000000000000..ad96a14f74378
--- /dev/null
+++ b/flang/test/Driver/frepack-arrays.f90
@@ -0,0 +1,24 @@
+! Test forwarding just the forwarding of -f[no-]repack-arrays options:
+! RUN: %flang -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=REPACKCMD %s
+! RUN: %flang -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=NOREPACKCMD %s
+! RUN: %flang -frepack-arrays -fno-repack-arrays %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=NOREPACKCMD %s
+! RUN: %flang -fno-repack-arrays -frepack-arrays %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=REPACKCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=REPACK %s
+! RUN: %flang_fc1 -fno-repack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -frepack-arrays -fno-repack-arrays %s -emit-hlfir -o - | 
FileCheck --check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -fno-repack-arrays -frepack-arrays %s -emit-hlfir -o - | 
FileCheck --check-prefix=REPACK %s
+
+! REPACKCMD: "-fc1"{{.*}}"-frepack-arrays"
+! REPACKCMD-NOT: -fno-repack-arrays
+! NOREPACKCMD: "-fc1"{{.*}}"-fno-repack-arrays"
+! NOREPACKCMD-NOT: -frepack-arrays
+
+subroutine test(x)
+  real :: x(:)
+  ! REPACK: fir.pack_array
+  ! REPACK: fir.unpack_array
+  ! NOREPACK-NOT: fir.pack_array
+  ! NOREPACK-NOT: fir.unpack_array
+end subroutine
diff --git a/flang/test/Driver/fstack-repack-arrays.f90 
b/flang/test/Driver/fstack-repack-arrays.f90
new file mode 100644
index 0000000000000..4fb5aa0c18032
--- /dev/null
+++ b/flang/test/Driver/fstack-repack-arrays.f90
@@ -0,0 +1,24 @@
+! Test forwarding just the forwarding of -f[no-]stack-repack-arrays options:
+! RUN: %flang -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=STACKCMD %s
+! RUN: %flang -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=HEAPCMD %s
+! RUN: %flang -fstack-repack-arrays -fno-stack-repack-arrays %s -### 
-fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s
+! RUN: %flang -fno-stack-repack-arrays -fstack-arrays %s -### -fsyntax-only 
2>&1 | FileCheck --check-prefix=HEAPCMD %s
+! RUN: %flang -fno-stack-repack-arrays -fstack-repack-arrays %s -### 
-fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays -fstack-repack-arrays %s -emit-hlfir -o - | 
FileCheck --check-prefix=STACK %s
+! RUN: %flang_fc1 -frepack-arrays -fno-stack-repack-arrays %s -emit-hlfir -o - 
| FileCheck --check-prefix=HEAP %s
+
+! Default setting is 'heap':
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=HEAP %s
+
+! STACKCMD: "-fc1"{{.*}}"-fstack-repack-arrays"
+! HEAPCMD: "-fc1"{{.*}}"-fno-stack-repack-arrays"
+
+subroutine test(x)
+  real :: x(:,:)
+  ! STACK: fir.pack_array{{.*}}stack
+  ! STACK: fir.unpack_array{{.*}}stack
+  ! HEAP: fir.pack_array{{.*}}heap
+  ! HEAP: fir.unpack_array{{.*}}heap
+end subroutine
diff --git a/flang/test/Lower/repack-arrays.f90 
b/flang/test/Lower/repack-arrays.f90
index 19ea93a3521a3..ff89df82793a3 100644
--- a/flang/test/Lower/repack-arrays.f90
+++ b/flang/test/Lower/repack-arrays.f90
@@ -1,7 +1,7 @@
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,WHOLE %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,WHOLE %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,INNER %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,INNER %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,WHOLE %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,WHOLE %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,INNER %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,INNER %s
 
 ! ALL-LABEL:   func.func @_QPtest1(
 ! ALL-SAME:                        
%[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>> 
{fir.bindc_name = "x"}) {
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 78ce510968ca5..434a9f3d1e986 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -251,15 +251,20 @@ static llvm::cl::opt<bool>
                                  "the LHS of the intrinsic assignment"),
                   llvm::cl::init(true));
 
-// TODO: -fstack-arrays is currently only used for fir.pack_array,
-// but it should probably be used for deciding how arrays/temporaries
-// are allocated during lowering.
+// TODO: -fstack-arrays is currently unused, but it should probably be used
+// for deciding how arrays/temporaries are allocated during lowering.
 static llvm::cl::opt<bool>
     stackArrays("fstack-arrays",
                 llvm::cl::desc("Allocate all arrays of unknown size and "
                                "temporary arrays in stack memory"),
                 llvm::cl::init(false));
 
+static llvm::cl::opt<bool> stackRepackArrays(
+    "fstack-repack-arrays",
+    llvm::cl::desc("Allocate temporary arrays for -frepack-arrays "
+                   "in stack memory"),
+    llvm::cl::init(false));
+
 static llvm::cl::opt<bool>
     repackArrays("frepack-arrays",
                  llvm::cl::desc("Pack non-contiguous assummed shape arrays "
@@ -430,6 +435,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
   loweringOptions.setInitGlobalZero(initGlobalZero);
   loweringOptions.setReallocateLHS(reallocateLHS);
   loweringOptions.setStackArrays(stackArrays);
+  loweringOptions.setStackRepackArrays(stackRepackArrays);
   loweringOptions.setRepackArrays(repackArrays);
   loweringOptions.setRepackArraysWhole(repackArraysWhole);
   std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};

>From 4cce1a1456bf7e625f1a7818126b8c53700b8240 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakha...@nvidia.com>
Date: Tue, 1 Apr 2025 16:31:10 -0700
Subject: [PATCH 2/3] Fixed sphinx build.

---
 clang/include/clang/Driver/Options.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 43bcdc6d1111f..65166adbf06c2 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6987,6 +6987,7 @@ may be significant, which may slow down some programs.
 
 Enabling array repacking may also change the behavior of certain
 programs:
+
 * The copy actions may introduce a data race in valid OpenACC/OpenMP programs.
   For example, if different threads execute the same subprogram
   with a non-contiguous assumed shape dummy array, and the different threads

>From 45884f16ec55940e382cd240aa8b7529fd09ff7d Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakha...@nvidia.com>
Date: Wed, 2 Apr 2025 11:32:35 -0700
Subject: [PATCH 3/3] Addressed review comments.

---
 clang/include/clang/Driver/Options.td           | 4 ++--
 flang/include/flang/Lower/LoweringOptions.def   | 4 ----
 flang/lib/Frontend/CompilerInvocation.cpp       | 1 -
 flang/test/Driver/frepack-arrays-contiguity.f90 | 7 ++++++-
 flang/test/Driver/frepack-arrays.f90            | 2 +-
 flang/test/Driver/fstack-repack-arrays.f90      | 2 +-
 flang/tools/bbc/bbc.cpp                         | 9 ---------
 7 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 65166adbf06c2..2ca5f99e4ca63 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6974,7 +6974,7 @@ defm repack_arrays
                     " non-contiguous assumed shape dummy arrays into "
                     "contiguous memory">>,
       DocBrief<[{Create temporary copies of non-contiguous assumed shape dummy
-arrays in subprogram prologues, and destroy them in subprotram epilogues.
+arrays in subprogram prologues, and destroy them in subprogram epilogues.
 The temporary copy is initialized with values from the original array
 in the prologue, if needed. In the epilogue, the current values
 in the temporary array are copied into the original array, if needed.
@@ -7037,7 +7037,7 @@ defm stack_repack_arrays
       DocBrief<[{Controls whether the array temporaries created under
 **-frepack-arrays** are allocated on the stack or on the heap.
 
-By default, the heap is used. Allocations of the polymorphic types
+By default, the heap is used. Allocations of polymorphic types
 are always done on the heap, though this may change in future releases.
   }]>;
 
diff --git a/flang/include/flang/Lower/LoweringOptions.def 
b/flang/include/flang/Lower/LoweringOptions.def
index d98823a0e3341..b062ea1a805ac 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -48,10 +48,6 @@ ENUM_LOWERINGOPT(ReallocateLHS, unsigned, 1, 1)
 /// On by default.
 ENUM_LOWERINGOPT(InitGlobalZero, unsigned, 1, 1)
 
-/// If true, the arrays of unknown size and array temporaries
-/// are requested to be allocated in stack memory.
-ENUM_LOWERINGOPT(StackArrays, unsigned, 1, 0)
-
 /// If true, the dummy assumed shape arrays are conditionally
 /// packed into contiguous memory.
 ENUM_LOWERINGOPT(RepackArrays, unsigned, 1, 0)
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 20a4a0a6cf745..6f87a18d69c3d 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1728,7 +1728,6 @@ void CompilerInvocation::setLoweringOptions() {
   const Fortran::common::LangOptions &langOptions = getLangOpts();
   loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() ==
                                     Fortran::common::LangOptions::SOB_Defined);
-  loweringOpts.setStackArrays(codegenOpts.StackArrays);
   Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions();
   // TODO: when LangOptions are finalized, we can represent
   //       the math related options using Fortran::commmon::MathOptionsBase,
diff --git a/flang/test/Driver/frepack-arrays-contiguity.f90 
b/flang/test/Driver/frepack-arrays-contiguity.f90
index d642cdac598af..88e5af4129eda 100644
--- a/flang/test/Driver/frepack-arrays-contiguity.f90
+++ b/flang/test/Driver/frepack-arrays-contiguity.f90
@@ -1,8 +1,11 @@
-! Test forwarding just the forwarding of -frepack-arrays-contiguity options:
+! Test forwarding of -frepack-arrays-contiguity options:
 ! RUN: %flang -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=WHOLECMD %s
 ! RUN: %flang -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 
| FileCheck --check-prefix=INNERMOSTCMD %s
 ! RUN: %flang -frepack-arrays-contiguity=innermost 
-frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=WHOLECMD %s
 ! RUN: %flang -frepack-arrays-contiguity=whole 
-frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=INNERMOSTCMD %s
+! RUN: not %flang -frepack-arrays-contiguity= 
-frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=ERROR %s
+! RUN: not %flang -frepack-arrays-contiguity=whole3 
-frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=ERROR %s
+! RUN: not %flang -frepack-arrays-contiguity=innermostg 
-frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=ERROR %s
 
 ! Test proper setting of the lowering options:
 ! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=whole %s 
-emit-hlfir -o - | FileCheck --check-prefix=WHOLE %s
@@ -13,6 +16,8 @@
 ! Default setting is 'innermost':
 ! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=INNERMOST %s
 
+! ERROR: error: unsupported argument '{{.*}}' to option 
'-frepack-arrays-contiguity='
+
 ! WHOLECMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=whole"
 ! INNERMOSTCMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=innermost"
 
diff --git a/flang/test/Driver/frepack-arrays.f90 
b/flang/test/Driver/frepack-arrays.f90
index ad96a14f74378..0d1913d282446 100644
--- a/flang/test/Driver/frepack-arrays.f90
+++ b/flang/test/Driver/frepack-arrays.f90
@@ -1,4 +1,4 @@
-! Test forwarding just the forwarding of -f[no-]repack-arrays options:
+! Test forwarding of -f[no-]repack-arrays options:
 ! RUN: %flang -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=REPACKCMD %s
 ! RUN: %flang -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=NOREPACKCMD %s
 ! RUN: %flang -frepack-arrays -fno-repack-arrays %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=NOREPACKCMD %s
diff --git a/flang/test/Driver/fstack-repack-arrays.f90 
b/flang/test/Driver/fstack-repack-arrays.f90
index 4fb5aa0c18032..406228cfe6105 100644
--- a/flang/test/Driver/fstack-repack-arrays.f90
+++ b/flang/test/Driver/fstack-repack-arrays.f90
@@ -1,4 +1,4 @@
-! Test forwarding just the forwarding of -f[no-]stack-repack-arrays options:
+! Test forwarding of -f[no-]stack-repack-arrays options:
 ! RUN: %flang -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=STACKCMD %s
 ! RUN: %flang -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=HEAPCMD %s
 ! RUN: %flang -fstack-repack-arrays -fno-stack-repack-arrays %s -### 
-fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 434a9f3d1e986..c544008a24d56 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -251,14 +251,6 @@ static llvm::cl::opt<bool>
                                  "the LHS of the intrinsic assignment"),
                   llvm::cl::init(true));
 
-// TODO: -fstack-arrays is currently unused, but it should probably be used
-// for deciding how arrays/temporaries are allocated during lowering.
-static llvm::cl::opt<bool>
-    stackArrays("fstack-arrays",
-                llvm::cl::desc("Allocate all arrays of unknown size and "
-                               "temporary arrays in stack memory"),
-                llvm::cl::init(false));
-
 static llvm::cl::opt<bool> stackRepackArrays(
     "fstack-repack-arrays",
     llvm::cl::desc("Allocate temporary arrays for -frepack-arrays "
@@ -434,7 +426,6 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
   loweringOptions.setIntegerWrapAround(integerWrapAround);
   loweringOptions.setInitGlobalZero(initGlobalZero);
   loweringOptions.setReallocateLHS(reallocateLHS);
-  loweringOptions.setStackArrays(stackArrays);
   loweringOptions.setStackRepackArrays(stackRepackArrays);
   loweringOptions.setRepackArrays(repackArrays);
   loweringOptions.setRepackArraysWhole(repackArraysWhole);

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to