https://github.com/vzakhari updated 
https://github.com/llvm/llvm-project/pull/134002

>From b936044f9a77cb717d74248cad5021b5d997d407 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakha...@nvidia.com>
Date: Tue, 1 Apr 2025 15:55:30 -0700
Subject: [PATCH 1/2] [flang] Added driver options for arrays repacking.

Added options:
  * -f[no-]repack-arrays
  * -f[no-]stack-repack-arrays
  * -frepack-arrays-contiguity=whole/innermost
---
 clang/include/clang/Driver/Options.td         | 62 ++++++++++++++++++-
 clang/lib/Driver/ToolChains/Flang.cpp         | 28 ++++++---
 flang/docs/ArrayRepacking.md                  | 19 +++---
 flang/include/flang/Lower/LoweringOptions.def |  5 ++
 flang/lib/Frontend/CompilerInvocation.cpp     | 14 +++++
 flang/lib/Lower/ConvertVariable.cpp           |  2 +-
 .../test/Driver/frepack-arrays-contiguity.f90 | 27 ++++++++
 flang/test/Driver/frepack-arrays.f90          | 24 +++++++
 flang/test/Driver/fstack-repack-arrays.f90    | 24 +++++++
 flang/test/Lower/repack-arrays.f90            |  8 +--
 flang/tools/bbc/bbc.cpp                       | 12 +++-
 11 files changed, 198 insertions(+), 27 deletions(-)
 create mode 100644 flang/test/Driver/frepack-arrays-contiguity.f90
 create mode 100644 flang/test/Driver/frepack-arrays.f90
 create mode 100644 flang/test/Driver/fstack-repack-arrays.f90

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 89cb03cc33b98..49a59c0417455 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6819,7 +6819,6 @@ defm real_8_real_10 : BooleanFFlag<"real-8-real-10">, 
Group<gfortran_Group>;
 defm real_8_real_16 : BooleanFFlag<"real-8-real-16">, Group<gfortran_Group>;
 defm real_8_real_4 : BooleanFFlag<"real-8-real-4">, Group<gfortran_Group>;
 defm recursive : BooleanFFlag<"recursive">, Group<gfortran_Group>;
-defm repack_arrays : BooleanFFlag<"repack-arrays">, Group<gfortran_Group>;
 defm second_underscore : BooleanFFlag<"second-underscore">, 
Group<gfortran_Group>;
 defm sign_zero : BooleanFFlag<"sign-zero">, Group<gfortran_Group>;
 defm whole_file : BooleanFFlag<"whole-file">, Group<gfortran_Group>;
@@ -6961,6 +6960,51 @@ defm unsigned : OptInFC1FFlag<"unsigned", "Enables 
UNSIGNED type">;
 def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
   HelpText<"Implies the SAVE attribute for non-automatic local objects in 
subprograms unless RECURSIVE">;
 
+defm repack_arrays
+    : BoolOptionWithoutMarshalling<
+          "f", "repack-arrays", PosFlag<SetTrue, [], [], "Pack">,
+          NegFlag<SetFalse, [], [], "Do not pack">,
+          BothFlags<[], [],
+                    " non-contiguous assumed shape dummy arrays into "
+                    "contiguous memory">>,
+      DocBrief<[{Create temporary copies of non-contiguous assumed shape dummy
+arrays in subprogram prologues, and destroy them in subprotram epilogues.
+The temporary copy is initialized with values from the original array
+in the prologue, if needed. In the epilogue, the current values
+in the temporary array are copied into the original array, if needed.
+
+Accessing the contiguous temporary in the program code may result
+in faster execution comparing to accessing elements of the original array,
+when they are sparse in memory. At the same time, the overhead
+of copying values between the original and the temporary arrays
+may be significant, which may slow down some programs.
+
+Enabling array repacking may also change the behavior of certain
+programs:
+* The copy actions may introduce a data race in valid OpenACC/OpenMP programs.
+  For example, if different threads execute the same subprogram
+  with a non-contiguous assumed shape dummy array, and the different threads
+  access unrelated parts of the array, then the whole array copy
+  made in each thread will cause a data race.
+* OpenACC/OpenMP offload programs may behave incorrectly with regards
+  to the device data environment, due to the fact that the original
+  array and the temporary may have different presence status on the device.
+* ``IS_CONTIGUOUS`` intrinsic may return ``TRUE`` with the array repacking
+  enabled, whereas if would return ``FALSE`` with the repacking disabled.
+* The result of ``LOC`` intrinsic applied to an actual argument associated
+  with a non-contiguous assumed shape dummy array, may be different
+  from the result of ``LOC`` applied to the dummy array.}]>;
+
+def frepack_arrays_contiguity_EQ
+    : Joined<["-"], "frepack-arrays-contiguity=">,
+      Group<f_Group>,
+      Values<"whole,innermost">,
+      HelpText<
+          "When -frepack-arrays is in effect, 'whole' enables "
+          "repacking for arrays that are non-contiguous in any dimension, "
+          "'innermost' enables repacking for arrays that are non-contiguous "
+          "in the innermost dimension (the default)">;
+
 defm save_main_program : BoolOptionWithoutMarshalling<"f", "save-main-program",
   PosFlag<SetTrue, [], [],
     "Place all main program variables in static memory (otherwise scalars may 
be placed on the stack)">,
@@ -6974,6 +7018,22 @@ defm loop_versioning : BoolOptionWithoutMarshalling<"f", 
"version-loops-for-stri
   PosFlag<SetTrue, [], [ClangOption], "Create unit-strided versions of loops">,
    NegFlag<SetFalse, [], [ClangOption], "Do not create unit-strided loops 
(default)">>;
 
+defm stack_repack_arrays
+    : BoolOptionWithoutMarshalling<
+          "f", "stack-repack-arrays",
+          PosFlag<SetTrue, [], [],
+                  "Attempt to allocate array temporaries created under "
+                  "-frepack-arrays on the stack">,
+          NegFlag<
+              SetFalse, [], [],
+              "Allocate -frepack-arrays temporaries on the heap (default)">>,
+      DocBrief<[{Controls whether the array temporaries created under
+**-frepack-arrays** are allocated on the stack or on the heap.
+
+By default, the heap is used. Allocations of the polymorphic types
+are always done on the heap, though this may change in future releases.
+  }]>;
+
 def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, 
Group<f_Group>,
   HelpText<"Emit hermetic module files (no nested USE association)">;
 } // let Visibility = [FC1Option, FlangOption]
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index a44513a83a2d7..ef451eb618a05 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -157,15 +157,25 @@ void Flang::addCodegenOptions(const ArgList &Args,
   if (shouldLoopVersion(Args))
     CmdArgs.push_back("-fversion-loops-for-stride");
 
-  Args.addAllArgs(CmdArgs,
-                  {options::OPT_flang_experimental_hlfir,
-                   options::OPT_flang_deprecated_no_hlfir,
-                   options::OPT_fno_ppc_native_vec_elem_order,
-                   options::OPT_fppc_native_vec_elem_order,
-                   options::OPT_finit_global_zero,
-                   options::OPT_fno_init_global_zero, 
options::OPT_ftime_report,
-                   options::OPT_ftime_report_EQ, options::OPT_funroll_loops,
-                   options::OPT_fno_unroll_loops});
+  for (const auto &arg :
+       Args.getAllArgValues(options::OPT_frepack_arrays_contiguity_EQ))
+    if (arg.compare("whole") != 0 && arg.compare("innermost") != 0) {
+      
getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
+          << "-frepack-arrays-contiguity=" << arg;
+    }
+
+  Args.addAllArgs(
+      CmdArgs,
+      {options::OPT_flang_experimental_hlfir,
+       options::OPT_flang_deprecated_no_hlfir,
+       options::OPT_fno_ppc_native_vec_elem_order,
+       options::OPT_fppc_native_vec_elem_order, options::OPT_finit_global_zero,
+       options::OPT_fno_init_global_zero, options::OPT_frepack_arrays,
+       options::OPT_fno_repack_arrays,
+       options::OPT_frepack_arrays_contiguity_EQ,
+       options::OPT_fstack_repack_arrays, options::OPT_fno_stack_repack_arrays,
+       options::OPT_ftime_report, options::OPT_ftime_report_EQ,
+       options::OPT_funroll_loops, options::OPT_fno_unroll_loops});
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
diff --git a/flang/docs/ArrayRepacking.md b/flang/docs/ArrayRepacking.md
index 87cfc5d1bb4bc..7de599f293e40 100755
--- a/flang/docs/ArrayRepacking.md
+++ b/flang/docs/ArrayRepacking.md
@@ -39,13 +39,13 @@ Having these results it seems reasonable to provide support 
for arrays repacking
 
 #### Facts and guesses about the implementation
 
-The dynamic checks for continuity and the array copy code is located 
completely in the 
[runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35),
 so the compiler inserts unconditional calls in the subprogram 
prologue/epilogue.
+The dynamic checks for contiguity and the array copy code is located 
completely in the 
[runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35),
 so the compiler inserts unconditional calls in the subprogram 
prologue/epilogue.
 
 It looks like `gfortran` ignores `intent(out)/intent(in)` which could have 
helped to avoid some of the `pack/unpack` overhead.
 
 It looks like the `pack`/`unpack` actions are inserted early in the 
compilation pipeline, and these extra calls affect behavior of the later 
optimization passes. For example, `Polyhedron/fatigue2` slows down by about 2x 
with `-frepack-arrays`: this slowdown is not caused by the `pack`/`unpack` 
overhead, but is a consequence of worse function inlining decisions made after 
the calls insertion. The benchmarks becomes even faster than the original 
version with `-frepack-arrays` and proper `-finline-limit=` settings, but it 
does not look like the benchmark contains code that would benefit from the 
array repacking.
 
-It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code 
after the function inlining, if the actual argument is statically known to be 
contiguous. So the overhead from the dynamic continuity checks is inevitable 
when `-frepack-arrays` is specified.
+It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code 
after the function inlining, if the actual argument is statically known to be 
contiguous. So the overhead from the dynamic contiguity checks is inevitable 
when `-frepack-arrays` is specified.
 
 It does not look like `gfortran` tries to optimize the insertion of 
`pack`/`unpack` code. For example, if a dummy array is only used under a 
condition within the subprogram, the repacking code might be inserted under the 
same condition to minimize the overhead on the unconditional path through the 
subprogram.
 
@@ -59,7 +59,7 @@ It does not look like `gfortran` tries to optimize the 
insertion of `pack`/`unpa
 
 #### Facts and guesses about the implementation
 
-The `pack` code is only generated if the actual argument may be non-contiguous 
in the innermost dimension, as determined statically, i.e. the compiler does 
not generate any dynamic continuity checks. For example:
+The `pack` code is only generated if the actual argument may be non-contiguous 
in the innermost dimension, as determined statically, i.e. the compiler does 
not generate any dynamic contiguity checks. For example:
 
 ```Fortran
 interface
@@ -132,8 +132,8 @@ So it does not seem practical/reasonable to enable the 
array repacking by defaul
 ### Performance
 
 1. Minimize the overhead of array repacking, e.g. avoid copy-in/out whenever 
possible, execute copy-in/out only on the execution paths where the array is 
accessed.
-2. Provide different modes of repacking depending on the "continuity" meaning, 
i.e. one - array is contiguous in the innermost dimension, two - array is 
contiguous in all dimensions.
-3. Avoid generating repacking code, when the "continuity" can be statically 
proven (including after optimization passes like constant propagation, function 
inlining, etc.).
+2. Provide different modes of repacking depending on the "contiguity" meaning, 
i.e. one - array is contiguous in the innermost dimension, two - array is 
contiguous in all dimensions.
+3. Avoid generating repacking code, when the "contiguity" can be statically 
proven (including after optimization passes like constant propagation, function 
inlining, etc.).
 4. Use a set of heuristics to avoid generating repacking code based on the 
array usage pattern, e.g. if an array is proven not to be used in an array 
expression or a loop, etc.
 5. Use a set of heuristics to avoid repacking actions dynamically, e.g. based 
on the array size, element size, byte stride(s) of the [innermost] 
dimension(s), etc.
 6. Minimize the impact of the IR changes, introduced by repacking, on the 
later optimization passes.
@@ -156,7 +156,7 @@ Controlled by cli options, Lowering will generate a 
`fir.pack_array` operation i
 The new operations will hold all the information that customizes further 
handling of the `pack`/`unpack` actions, such as:
 
 * Optional array of attributes supporting an interface to generate a predicate 
that says if the repacking is safe in the current context.
-* The continuity mode: `innermost` vs `whole`.
+* The contiguity mode: `innermost` vs `whole`.
 * Attributes selecting the heuristics (both compiler and runtime ones) that 
may be applied to avoid `pack`/`unpack` actions.
 * Other attributes, like `stack` vs `heap` to manage the temporary allocation 
according to `-fstack-arrays`, etc.
 
@@ -195,7 +195,7 @@ The operation creates a new `!fir.box/class<!fir.array<>>` 
value to represent ei
 Arguments:
 
 * `stack` - indicates if `-fstack-arrays` is in effect for compiling this 
function.
-* `innermost` - tells that the repacking has to be done iff the array is not 
contiguous in the innermost dimension. This also describes what type of 
continuity can be expected from `%new_var`, i.e. `innermost` means that the 
resulting array is definitely contiguous in the innermost dimension, but may be 
non-contiguous in other dimensions (unless additional analysis proves 
otherwise). For 1-D arrays, `innermost` attribute is not valid.
+* `innermost` - tells that the repacking has to be done iff the array is not 
contiguous in the innermost dimension. This also describes what type of 
contiguity can be expected from `%new_var`, i.e. `innermost` means that the 
resulting array is definitely contiguous in the innermost dimension, but may be 
non-contiguous in other dimensions (unless additional analysis proves 
otherwise). For 1-D arrays, `innermost` attribute is not valid.
 * `no_copy` - indicates that, in case a temporary array is created, `%var` to 
`%new_var` copy is not required (`intent(out)` dummy argument case).
 * `heuristics`
   * `loop-only` - `fir.pack_array` can be optimized away, if the array is not 
used in a loop.
@@ -351,7 +351,7 @@ The `fir.pack_array`'s copy-in action cannot be skipped for 
`INTENT(OUT)` dummy
 
 #### Optional behavior
 
-In case of the `whole` continuity mode or with 1-D array, Flang can propagate 
this information to `hlfir.declare` - this may improve optimizations down the 
road. This can be done iff the repacking has no dynamic constraints and/or 
heuristics. For example:
+In case of the `whole` contiguity mode or with 1-D array, Flang can propagate 
this information to `hlfir.declare` - this may improve optimizations down the 
road. This can be done iff the repacking has no dynamic constraints and/or 
heuristics. For example:
 
 ```
     %c0 = arith.constant 0 : index
@@ -441,10 +441,11 @@ In cases where `fir.pack_array` is statically known to 
produce a copy that is co
 The following user options are proposed:
 
 * `-frepack-arrays` - the option forces Flang to repack a non-contiguous 
assumed-shape dummy array into a temporary contiguous memory, which may result 
in faster accesses of the array. The compiler will insert special code in 
subprogram prologue to allocate a temporary array and copy the original array 
into the temporary; in subprogram epilogue, it will insert a copy from the 
temporary array into the original array and deallocate the temporary. The 
overhead of the allocation/deallocation and the copies may be significant 
depending on the array size. The compiler will try to optimize the 
unnecessary/unprofitable repacking.
+* `-fstack-repack-arrays` - attempt allocating the temporary arrays in stack 
memory. By default, they are allocated in heap memory (note that 
`-fstack-arrays` does not affect the allocation of the temporaries created for 
the arrays repacking).
 * `-frepack-arrays-opts=[none|loop-only]` - the option enables optimizations 
that may eliminate the array repacking code depending on the array usage 
pattern:
   * `none` - no optimizations.
   * `loop-only` - the array repacking code will be removed in any subprogram 
where the array is not used inside a loop or an array expression.
-* `-frepack-arrays-continuity=[whole|innermost]`:
+* `-frepack-arrays-contiguity=[whole|innermost]`:
   * `whole` - the option will repack arrays that are non-contiguous in any 
dimension (default).
   * `innermost` - the option will repack arrays that are non-contiguous in the 
innermost dimension.
 * `-frepack-arrays-max-size=<int>` - arrays bigger than the specified size 
will not be repacked.
diff --git a/flang/include/flang/Lower/LoweringOptions.def 
b/flang/include/flang/Lower/LoweringOptions.def
index 6735bea551414..d98823a0e3341 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -56,6 +56,11 @@ ENUM_LOWERINGOPT(StackArrays, unsigned, 1, 0)
 /// packed into contiguous memory.
 ENUM_LOWERINGOPT(RepackArrays, unsigned, 1, 0)
 
+/// If true, the temporary arrays created under RepackArrays
+/// control will be allocated in stack memory. If false,
+/// they will be allocated in heap memory.
+ENUM_LOWERINGOPT(StackRepackArrays, unsigned, 1, 0)
+
 /// If true, the repacking (RepackArrays option above)
 /// will be done for arrays non-contiguous in any dimension,
 /// otherwise, it will be done only for arrays non-contiguous
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 229695b18d278..4c3b63f07b190 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1448,6 +1448,19 @@ bool CompilerInvocation::createFromArgs(
                     clang::driver::options::OPT_fno_realloc_lhs, true))
     invoc.loweringOpts.setReallocateLHS(false);
 
+  invoc.loweringOpts.setRepackArrays(
+      args.hasFlag(clang::driver::options::OPT_frepack_arrays,
+                   clang::driver::options::OPT_fno_repack_arrays,
+                   /*default=*/false));
+  invoc.loweringOpts.setStackRepackArrays(
+      args.hasFlag(clang::driver::options::OPT_fstack_repack_arrays,
+                   clang::driver::options::OPT_fno_stack_repack_arrays,
+                   /*default=*/false));
+  if (auto *arg = args.getLastArg(
+          clang::driver::options::OPT_frepack_arrays_contiguity_EQ))
+    invoc.loweringOpts.setRepackArraysWhole(arg->getValue() ==
+                                            llvm::StringRef{"whole"});
+
   success &= parseFrontendArgs(invoc.getFrontendOpts(), args, diags);
   parseTargetArgs(invoc.getTargetOpts(), args);
   parsePreprocessorArgs(invoc.getPreprocessorOpts(), args);
@@ -1687,6 +1700,7 @@ void CompilerInvocation::setLoweringOptions() {
   const Fortran::common::LangOptions &langOptions = getLangOpts();
   loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() ==
                                     Fortran::common::LangOptions::SOB_Defined);
+  loweringOpts.setStackArrays(codegenOpts.StackArrays);
   Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions();
   // TODO: when LangOptions are finalized, we can represent
   //       the math related options using Fortran::commmon::MathOptionsBase,
diff --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 0b22b743edee9..366ff328bfa27 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -2630,7 +2630,7 @@ 
Fortran::lower::genPackArray(Fortran::lower::AbstractConverter &converter,
       });
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   const mlir::Location loc = genLocation(converter, sym);
-  bool stackAlloc = opts.getStackArrays();
+  bool stackAlloc = opts.getStackRepackArrays();
   // 1D arrays must always use 'whole' mode.
   bool isInnermostMode = !opts.getRepackArraysWhole() && sym.Rank() > 1;
   // Avoid copy-in for 'intent(out)' variable, unless this is a dummy
diff --git a/flang/test/Driver/frepack-arrays-contiguity.f90 
b/flang/test/Driver/frepack-arrays-contiguity.f90
new file mode 100644
index 0000000000000..d642cdac598af
--- /dev/null
+++ b/flang/test/Driver/frepack-arrays-contiguity.f90
@@ -0,0 +1,27 @@
+! Test forwarding just the forwarding of -frepack-arrays-contiguity options:
+! RUN: %flang -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=WHOLECMD %s
+! RUN: %flang -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 
| FileCheck --check-prefix=INNERMOSTCMD %s
+! RUN: %flang -frepack-arrays-contiguity=innermost 
-frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=WHOLECMD %s
+! RUN: %flang -frepack-arrays-contiguity=whole 
-frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=INNERMOSTCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=whole %s 
-emit-hlfir -o - | FileCheck --check-prefix=WHOLE %s
+! RUN: %flang_fc1 -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | 
FileCheck --check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=innermost %s 
-emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s
+! RUN: %flang_fc1 -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | 
FileCheck --check-prefix=NOREPACK %s
+
+! Default setting is 'innermost':
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=INNERMOST %s
+
+! WHOLECMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=whole"
+! INNERMOSTCMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=innermost"
+
+subroutine test(x)
+  real :: x(:,:)
+  ! WHOLE: fir.pack_array{{.*}}whole
+  ! WHOLE: fir.unpack_array
+  ! INERMOST: fir.pack_array{{.*}}innermost
+  ! INNERMOST: fir.unpack_array
+  ! NOREPACK-NOT: fir.pack_array
+  ! NOREPACK-NOT: fir.unpack_array
+end subroutine
diff --git a/flang/test/Driver/frepack-arrays.f90 
b/flang/test/Driver/frepack-arrays.f90
new file mode 100644
index 0000000000000..ad96a14f74378
--- /dev/null
+++ b/flang/test/Driver/frepack-arrays.f90
@@ -0,0 +1,24 @@
+! Test forwarding just the forwarding of -f[no-]repack-arrays options:
+! RUN: %flang -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=REPACKCMD %s
+! RUN: %flang -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=NOREPACKCMD %s
+! RUN: %flang -frepack-arrays -fno-repack-arrays %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=NOREPACKCMD %s
+! RUN: %flang -fno-repack-arrays -frepack-arrays %s -### -fsyntax-only 2>&1 | 
FileCheck --check-prefix=REPACKCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=REPACK %s
+! RUN: %flang_fc1 -fno-repack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -frepack-arrays -fno-repack-arrays %s -emit-hlfir -o - | 
FileCheck --check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -fno-repack-arrays -frepack-arrays %s -emit-hlfir -o - | 
FileCheck --check-prefix=REPACK %s
+
+! REPACKCMD: "-fc1"{{.*}}"-frepack-arrays"
+! REPACKCMD-NOT: -fno-repack-arrays
+! NOREPACKCMD: "-fc1"{{.*}}"-fno-repack-arrays"
+! NOREPACKCMD-NOT: -frepack-arrays
+
+subroutine test(x)
+  real :: x(:)
+  ! REPACK: fir.pack_array
+  ! REPACK: fir.unpack_array
+  ! NOREPACK-NOT: fir.pack_array
+  ! NOREPACK-NOT: fir.unpack_array
+end subroutine
diff --git a/flang/test/Driver/fstack-repack-arrays.f90 
b/flang/test/Driver/fstack-repack-arrays.f90
new file mode 100644
index 0000000000000..4fb5aa0c18032
--- /dev/null
+++ b/flang/test/Driver/fstack-repack-arrays.f90
@@ -0,0 +1,24 @@
+! Test forwarding just the forwarding of -f[no-]stack-repack-arrays options:
+! RUN: %flang -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=STACKCMD %s
+! RUN: %flang -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck 
--check-prefix=HEAPCMD %s
+! RUN: %flang -fstack-repack-arrays -fno-stack-repack-arrays %s -### 
-fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s
+! RUN: %flang -fno-stack-repack-arrays -fstack-arrays %s -### -fsyntax-only 
2>&1 | FileCheck --check-prefix=HEAPCMD %s
+! RUN: %flang -fno-stack-repack-arrays -fstack-repack-arrays %s -### 
-fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays -fstack-repack-arrays %s -emit-hlfir -o - | 
FileCheck --check-prefix=STACK %s
+! RUN: %flang_fc1 -frepack-arrays -fno-stack-repack-arrays %s -emit-hlfir -o - 
| FileCheck --check-prefix=HEAP %s
+
+! Default setting is 'heap':
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck 
--check-prefix=HEAP %s
+
+! STACKCMD: "-fc1"{{.*}}"-fstack-repack-arrays"
+! HEAPCMD: "-fc1"{{.*}}"-fno-stack-repack-arrays"
+
+subroutine test(x)
+  real :: x(:,:)
+  ! STACK: fir.pack_array{{.*}}stack
+  ! STACK: fir.unpack_array{{.*}}stack
+  ! HEAP: fir.pack_array{{.*}}heap
+  ! HEAP: fir.unpack_array{{.*}}heap
+end subroutine
diff --git a/flang/test/Lower/repack-arrays.f90 
b/flang/test/Lower/repack-arrays.f90
index 19ea93a3521a3..ff89df82793a3 100644
--- a/flang/test/Lower/repack-arrays.f90
+++ b/flang/test/Lower/repack-arrays.f90
@@ -1,7 +1,7 @@
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,WHOLE %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,WHOLE %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,INNER %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,INNER %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,WHOLE %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false 
-frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,WHOLE %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,STACK,INNER %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false 
-frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck 
--check-prefixes=ALL,HEAP,INNER %s
 
 ! ALL-LABEL:   func.func @_QPtest1(
 ! ALL-SAME:                        
%[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>> 
{fir.bindc_name = "x"}) {
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 2cc75b7aa4e87..4cf9074fc93c4 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -245,15 +245,20 @@ static llvm::cl::opt<bool>
                                  "the LHS of the intrinsic assignment"),
                   llvm::cl::init(true));
 
-// TODO: -fstack-arrays is currently only used for fir.pack_array,
-// but it should probably be used for deciding how arrays/temporaries
-// are allocated during lowering.
+// TODO: -fstack-arrays is currently unused, but it should probably be used
+// for deciding how arrays/temporaries are allocated during lowering.
 static llvm::cl::opt<bool>
     stackArrays("fstack-arrays",
                 llvm::cl::desc("Allocate all arrays of unknown size and "
                                "temporary arrays in stack memory"),
                 llvm::cl::init(false));
 
+static llvm::cl::opt<bool> stackRepackArrays(
+    "fstack-repack-arrays",
+    llvm::cl::desc("Allocate temporary arrays for -frepack-arrays "
+                   "in stack memory"),
+    llvm::cl::init(false));
+
 static llvm::cl::opt<bool>
     repackArrays("frepack-arrays",
                  llvm::cl::desc("Pack non-contiguous assummed shape arrays "
@@ -412,6 +417,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
   loweringOptions.setInitGlobalZero(initGlobalZero);
   loweringOptions.setReallocateLHS(reallocateLHS);
   loweringOptions.setStackArrays(stackArrays);
+  loweringOptions.setStackRepackArrays(stackRepackArrays);
   loweringOptions.setRepackArrays(repackArrays);
   loweringOptions.setRepackArraysWhole(repackArraysWhole);
   std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};

>From fb36b911276c5d2e18ea533d3dc5dee426000265 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakha...@nvidia.com>
Date: Tue, 1 Apr 2025 16:31:10 -0700
Subject: [PATCH 2/2] Fixed sphinx build.

---
 clang/include/clang/Driver/Options.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 49a59c0417455..555ac3fbc5b16 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6981,6 +6981,7 @@ may be significant, which may slow down some programs.
 
 Enabling array repacking may also change the behavior of certain
 programs:
+
 * The copy actions may introduce a data race in valid OpenACC/OpenMP programs.
   For example, if different threads execute the same subprogram
   with a non-contiguous assumed shape dummy array, and the different threads

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to