From 70d2e852a9013e72b187788c46b0ecf928b93eef Mon Sep 17 00:00:00 2001
From: Christopher Albert <albert@tugraz.at>
Date: Fri, 17 Oct 2025 12:39:28 +0200
Subject: [PATCH] gfortran: runtime deep copy via descriptor walker

---
 gcc/fortran/trans-array.cc                    | 149 +++++++++++-------
 gcc/fortran/trans-decl.cc                     |  19 ++-
 .../alloc_comp_deep_copy_nested_recursive.f90 |  29 ++++
 .../alloc_comp_deep_copy_recursive_array.f90  |  22 +++
 libgfortran/Makefile.in                       |   8 +-
 libgfortran/libgfortran.h                     |   8 +-
 libgfortran/runtime/deep_copy.c               | 131 ++++++++-------
 7 files changed, 241 insertions(+), 125 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_nested_recursive.f90
 create mode 100644 gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_recursive_array.f90

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index c40b849599c..1ec3fb3503a 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -95,6 +95,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "cgraph.h"     /* For cgraph_node::finalize_function.  */
 #include "gimplify.h"   /* For gimplify_function_tree.  */
 #include "function.h"   /* For push/pop_function_context, allocate_struct_function.  */
+#include "toplev.h"     /* For announce_function, rest_of_decl_compilation.  */
+#include "varasm.h"     /* For make_decl_rtl.  */
 
 static bool gfc_get_array_constructor_size (mpz_t *, gfc_constructor_base);
 
@@ -10041,37 +10043,60 @@ static tree structure_alloc_comps (gfc_symbol *, tree, tree, int, int, int,
    function pointer to the runtime helper _gfortran_cfi_deep_copy_array,
    allowing recursion to happen at runtime instead of compile time.  */
 
+static tree
+get_copy_helper_function_type (void)
+{
+  static tree fn_type = NULL_TREE;
+  if (fn_type == NULL_TREE)
+    fn_type = build_function_type_list (void_type_node,
+					pvoid_type_node,
+					pvoid_type_node,
+					NULL_TREE);
+  return fn_type;
+}
+
+static tree
+get_copy_helper_pointer_type (void)
+{
+  static tree ptr_type = NULL_TREE;
+  if (ptr_type == NULL_TREE)
+    ptr_type = build_pointer_type (get_copy_helper_function_type ());
+  return ptr_type;
+}
+
 static tree
 generate_element_copy_wrapper (gfc_symbol *der_type, tree comp_type,
 				int purpose, int caf_mode,
 				gfc_co_subroutines_args *args)
 {
-  tree fndecl, fntype, retval, saved_function_decl;
+  tree fndecl, fntype, result_decl, saved_function_decl;
   tree dest_parm, src_parm, dest_typed, src_typed;
   tree der_type_ptr;
   stmtblock_t block;
+  tree decls;
+  tree body;
 
-  /* Save current function context.  */
   saved_function_decl = current_function_decl;
   push_function_context ();
 
-  /* Create function type: void fn(void*, void*).  */
-  fntype = build_function_type_list (void_type_node,
-				     pvoid_type_node,  /* dest */
-				     pvoid_type_node,  /* src */
-				     NULL_TREE);
+  fntype = get_copy_helper_function_type ();
 
-  /* Create function declaration.  */
   fndecl = build_decl (input_location, FUNCTION_DECL,
 		       create_tmp_var_name ("copy_element"),
 		       fntype);
 
-  /* Mark as static (internal linkage).  */
   TREE_PUBLIC (fndecl) = 0;
   TREE_STATIC (fndecl) = 1;
+  TREE_USED (fndecl) = 1;
   DECL_ARTIFICIAL (fndecl) = 1;
 
-  /* Create parameter declarations.  */
+  result_decl = build_decl (input_location, RESULT_DECL, NULL_TREE,
+			    void_type_node);
+  DECL_ARTIFICIAL (result_decl) = 1;
+  DECL_IGNORED_P (result_decl) = 1;
+  DECL_CONTEXT (result_decl) = fndecl;
+  DECL_RESULT (fndecl) = result_decl;
+
   dest_parm = build_decl (input_location, PARM_DECL,
 			  get_identifier ("dest"), pvoid_type_node);
   src_parm = build_decl (input_location, PARM_DECL,
@@ -10084,59 +10109,55 @@ generate_element_copy_wrapper (gfc_symbol *der_type, tree comp_type,
   DECL_CONTEXT (dest_parm) = fndecl;
   DECL_CONTEXT (src_parm) = fndecl;
 
-  /* Link parameters.  */
   DECL_ARGUMENTS (fndecl) = dest_parm;
   TREE_CHAIN (dest_parm) = src_parm;
 
-  /* Setup result decl before setting current_function_decl.  */
-  retval = build_decl (input_location, RESULT_DECL, NULL_TREE,
-		       void_type_node);
-  DECL_ARTIFICIAL (retval) = 1;
-  DECL_IGNORED_P (retval) = 1;
-  DECL_CONTEXT (retval) = fndecl;
-  DECL_RESULT (fndecl) = retval;
-
-  /* Setup function context for body generation.  */
   pushdecl (fndecl);
   current_function_decl = fndecl;
+  announce_function (fndecl);
+
+  rest_of_decl_compilation (fndecl, 0, 0);
+  make_decl_rtl (fndecl);
   allocate_struct_function (fndecl, false);
 
-  /* Build function body.  */
+  pushlevel ();
   gfc_init_block (&block);
 
-  /* Cast void* to derived type pointer.  */
   der_type_ptr = build_pointer_type (comp_type);
   dest_typed = fold_convert (der_type_ptr, dest_parm);
   src_typed = fold_convert (der_type_ptr, src_parm);
 
-  /* Dereference to get actual objects.  */
   dest_typed = build_fold_indirect_ref (dest_typed);
   src_typed = build_fold_indirect_ref (src_typed);
 
-  /* Call structure_alloc_comps for element copy (rank=0 for scalar element).  */
-  retval = structure_alloc_comps (der_type, src_typed, dest_typed,
-				  0, purpose, caf_mode, args, false);
+  body = structure_alloc_comps (der_type, src_typed, dest_typed,
+				0, purpose, caf_mode, args, false);
+  gfc_add_expr_to_block (&block, body);
 
-  gfc_add_expr_to_block (&block, retval);
+  body = gfc_finish_block (&block);
+  decls = getdecls ();
 
-  /* Set function body.  */
-  DECL_SAVED_TREE (fndecl) = gfc_finish_block (&block);
+  poplevel (1, 1);
+  BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl;
+
+  DECL_SAVED_TREE (fndecl)
+    = fold_build3_loc (DECL_SOURCE_LOCATION (fndecl), BIND_EXPR,
+		       void_type_node, decls, body, DECL_INITIAL (fndecl));
 
-  /* Finalize function.  */
-  cfun->function_end_locus = input_location;
   gimplify_function_tree (fndecl);
 
+  cfun->function_end_locus = input_location;
+  set_cfun (NULL);
+
   if (decl_function_context (fndecl))
     (void) cgraph_node::create (fndecl);
   else
-    cgraph_node::finalize_function (fndecl, false);
+    cgraph_node::finalize_function (fndecl, true);
 
-  /* Restore function context.  */
   pop_function_context ();
   current_function_decl = saved_function_decl;
 
-  /* Return function address.  */
-  return build1 (ADDR_EXPR, build_pointer_type (fntype), fndecl);
+  return build1 (ADDR_EXPR, get_copy_helper_pointer_type (), fndecl);
 }
 
 static tree
@@ -10302,6 +10323,12 @@ structure_alloc_comps (gfc_symbol * der_type, tree decl, tree dest,
 	   && seen_derived_types.contains (c->ts.u.derived))
 	  || (c->ts.type == BT_CLASS
 	      && seen_derived_types.contains (CLASS_DATA (c)->ts.u.derived));
+      bool inside_wrapper
+	= (current_function_decl != NULL
+	   && DECL_ARTIFICIAL (current_function_decl)
+	   && DECL_NAME (current_function_decl) != NULL
+	   && strncmp (IDENTIFIER_POINTER (DECL_NAME (current_function_decl)),
+		       "copy_element", 12) == 0);
 
       bool is_pdt_type = c->ts.type == BT_DERIVED
 			 && c->ts.u.derived->attr.pdt_type;
@@ -10978,24 +11005,18 @@ structure_alloc_comps (gfc_symbol * der_type, tree decl, tree dest,
 					   false, false, NULL_TREE, NULL_TREE);
 	      gfc_add_expr_to_block (&fnblock, tmp);
 	    }
-	  /* Special case: recursive allocatable array components require runtime
-	     helper to avoid compile-time infinite recursion. Generate a call to
-	     _gfortran_cfi_deep_copy_array with an element copy wrapper.
-
-	     IMPORTANT: Only do this in the main function context, not when already
-	     inside a generated wrapper (which would cause infinite wrapper generation).
-	     We detect being inside a wrapper by checking if current_function_decl
-	     has DECL_ARTIFICIAL set and is named "copy_element*".  */
-	  else if (c->attr.allocatable && c->as && cmp_has_alloc_comps && same_type
-		   && purpose == COPY_ALLOC_COMP && !c->attr.proc_pointer
-		   && !c->attr.codimension && !caf_in_coarray (caf_mode)
-		   && c->ts.type == BT_DERIVED && c->ts.u.derived != NULL
-		   && !(current_function_decl != NULL
-			&& DECL_ARTIFICIAL (current_function_decl)
-			&& strncmp (IDENTIFIER_POINTER (DECL_NAME (current_function_decl)),
-				    "copy_element", 12) == 0))
+      /* Special case: recursive allocatable array components require runtime
+	 helper to avoid compile-time infinite recursion. Generate a call to
+	 _gfortran_cfi_deep_copy_array with an element copy wrapper.  */
+      else if (c->attr.allocatable && c->as && cmp_has_alloc_comps && same_type
+	       && purpose == COPY_ALLOC_COMP && !c->attr.proc_pointer
+	       && !c->attr.codimension && !caf_in_coarray (caf_mode)
+	       && c->ts.type == BT_DERIVED && c->ts.u.derived != NULL)
 	    {
-	      tree copy_wrapper, call, dest_addr, src_addr, elem_type;
+      tree copy_wrapper, call, dest_addr, src_addr, elem_type;
+      tree helper_ptr_type;
+      tree alloc_expr;
+      int comp_rank;
 
 	      /* Get the element type from ctype (which is already the component type).
 		 For arrays, we need the element type, not the array type.  */
@@ -11005,13 +11026,23 @@ structure_alloc_comps (gfc_symbol * der_type, tree decl, tree dest,
 	      else if (TREE_CODE (ctype) == ARRAY_TYPE)
 		elem_type = TREE_TYPE (ctype);
 
-	      /* Generate element copy wrapper function. The wrapper will handle
-		 element-wise copy, and since it operates on scalar elements (not arrays),
-		 it won't re-trigger this special case.  */
-	      copy_wrapper = generate_element_copy_wrapper (c->ts.u.derived,
-							    elem_type,
-							    purpose, caf_mode,
-							    args);
+      helper_ptr_type = get_copy_helper_pointer_type ();
+
+      comp_rank = c->as ? c->as->rank : 0;
+      alloc_expr = gfc_duplicate_allocatable_nocopy (dcmp, comp, ctype,
+                                                    comp_rank);
+      gfc_add_expr_to_block (&fnblock, alloc_expr);
+
+	      /* Generate or reuse the element copy helper.  Inside an existing helper
+		 we can reuse the current function to prevent recursive generation.  */
+	      if (inside_wrapper)
+		copy_wrapper = gfc_build_addr_expr (NULL_TREE, current_function_decl);
+	      else
+		copy_wrapper = generate_element_copy_wrapper (c->ts.u.derived,
+							      elem_type,
+							      purpose, caf_mode,
+							      args);
+	      copy_wrapper = fold_convert (helper_ptr_type, copy_wrapper);
 
 	      /* Build addresses of descriptors.  */
 	      dest_addr = gfc_build_addr_expr (pvoid_type_node, dcmp);
diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc
index d696a14a459..419de2c63cf 100644
--- a/gcc/fortran/trans-decl.cc
+++ b/gcc/fortran/trans-decl.cc
@@ -3591,9 +3591,22 @@ gfc_build_intrinsic_function_decls (void)
     gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node,
     gfc_logical4_type_node);
 
-  gfor_fndecl_cfi_deep_copy_array = gfc_build_library_function_decl_with_spec (
-    get_identifier (PREFIX ("cfi_deep_copy_array")), ". R R . ",
-    void_type_node, 3, pvoid_type_node, pvoid_type_node, pvoid_type_node);
+  {
+    tree copy_helper_ptr_type;
+    tree copy_helper_fn_type;
+
+    copy_helper_fn_type = build_function_type_list (void_type_node,
+						    pvoid_type_node,
+						    pvoid_type_node,
+						    NULL_TREE);
+    copy_helper_ptr_type = build_pointer_type (copy_helper_fn_type);
+
+    gfor_fndecl_cfi_deep_copy_array
+      = gfc_build_library_function_decl_with_spec (
+	  get_identifier (PREFIX ("cfi_deep_copy_array")), ". R R . ",
+	  void_type_node, 3, pvoid_type_node, pvoid_type_node,
+	  copy_helper_ptr_type);
+  }
 
   gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec (
 	get_identifier (PREFIX("adjustl")), ". W . R ",
diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_nested_recursive.f90 b/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_nested_recursive.f90
new file mode 100644
index 00000000000..8ea5374ea8c
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_nested_recursive.f90
@@ -0,0 +1,29 @@
+! { dg-do run }
+program alloc_comp_deep_copy_nested_recursive
+  use, intrinsic :: iso_fortran_env, only: dp => real64
+  implicit none
+
+  type :: node_t
+    real(dp), allocatable :: values(:)
+    type(node_t), allocatable :: children(:)
+  end type node_t
+
+  type(node_t) :: src, dst
+
+  allocate (src%values(3))
+  src%values = [ 1.0_dp, 2.0_dp, 3.0_dp ]
+
+  allocate (src%children(1))
+  allocate (src%children(1)%values(2))
+  src%children(1)%values = [ 4.0_dp, 5.0_dp ]
+
+  dst = src
+
+  dst%values(1) = -1.0_dp
+  dst%children(1)%values(1) = -2.0_dp
+
+  if (any(src%values /= [ 1.0_dp, 2.0_dp, 3.0_dp ])) stop 1
+  if (any(src%children(1)%values /= [ 4.0_dp, 5.0_dp ])) stop 2
+  if (any(dst%values /= [ -1.0_dp, 2.0_dp, 3.0_dp ])) stop 3
+  if (any(dst%children(1)%values /= [ -2.0_dp, 5.0_dp ])) stop 4
+end program alloc_comp_deep_copy_nested_recursive
diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_recursive_array.f90 b/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_recursive_array.f90
new file mode 100644
index 00000000000..7cbd2b29b25
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_recursive_array.f90
@@ -0,0 +1,22 @@
+! { dg-do run }
+program alloc_comp_deep_copy_recursive_array
+  implicit none
+
+  type :: node_t
+    character(len=10) :: name
+    type(node_t), allocatable :: children(:)
+  end type node_t
+
+  type(node_t) :: src, dst
+
+  src%name = "root"
+  allocate (src%children(2))
+  src%children(1)%name = "child-one"
+  src%children(2)%name = "child-two"
+
+  dst = src
+  dst%children(1)%name = "modified"
+
+  if (src%children(1)%name /= "child-one") stop 1
+  if (dst%children(1)%name /= "modified") stop 2
+end program alloc_comp_deep_copy_recursive_array
diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in
index ce828b2f8d0..116e80ffe3c 100644
--- a/libgfortran/Makefile.in
+++ b/libgfortran/Makefile.in
@@ -231,7 +231,7 @@ libgfortran_la_LIBADD =
 @LIBGFOR_MINIMAL_FALSE@	runtime/fpu.lo runtime/main.lo \
 @LIBGFOR_MINIMAL_FALSE@	runtime/pause.lo runtime/stop.lo
 am__objects_3 = runtime/bounds.lo runtime/compile_options.lo \
-	runtime/memory.lo runtime/string.lo runtime/select.lo \
+	runtime/deep_copy.lo runtime/memory.lo runtime/string.lo runtime/select.lo \
 	$(am__objects_1) $(am__objects_2)
 am__objects_4 = generated/matmul_i1.lo generated/matmul_i2.lo \
 	generated/matmul_i4.lo generated/matmul_i8.lo \
@@ -1013,8 +1013,8 @@ gfor_helper_src = intrinsics/associated.c intrinsics/abort.c \
 @IEEE_SUPPORT_TRUE@ieee/ieee_exceptions.F90 \
 @IEEE_SUPPORT_TRUE@ieee/ieee_features.F90
 
-gfor_src = runtime/bounds.c runtime/compile_options.c runtime/memory.c \
-	runtime/string.c runtime/select.c $(am__append_6) \
+gfor_src = runtime/bounds.c runtime/compile_options.c runtime/deep_copy.c \
+	runtime/memory.c runtime/string.c runtime/select.c $(am__append_6) \
 	$(am__append_7)
 i_matmul_c = \
 generated/matmul_i1.c \
@@ -1981,6 +1981,8 @@ runtime/bounds.lo: runtime/$(am__dirstamp) \
 	runtime/$(DEPDIR)/$(am__dirstamp)
 runtime/compile_options.lo: runtime/$(am__dirstamp) \
 	runtime/$(DEPDIR)/$(am__dirstamp)
+runtime/deep_copy.lo: runtime/$(am__dirstamp) \
+	runtime/$(DEPDIR)/$(am__dirstamp)
 runtime/memory.lo: runtime/$(am__dirstamp) \
 	runtime/$(DEPDIR)/$(am__dirstamp)
 runtime/string.lo: runtime/$(am__dirstamp) \
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index 32dcf749353..22fe75ba89c 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -917,10 +917,10 @@ internal_proto(xrealloc);
 /* deep_copy.c - Runtime helper for recursive allocatable array components */
 
 struct CFI_cdesc_t;
-extern void _gfortran_cfi_deep_copy_array (struct CFI_cdesc_t *,
-					    struct CFI_cdesc_t *,
-					    void (*)(void*, void*));
-export_proto(_gfortran_cfi_deep_copy_array);
+extern void cfi_deep_copy_array (gfc_array_void *,
+                                 gfc_array_void *,
+                                 void (*)(void*, void*));
+export_proto(cfi_deep_copy_array);
 
 /* environ.c */
 
diff --git a/libgfortran/runtime/deep_copy.c b/libgfortran/runtime/deep_copy.c
index ab5c56f720e..3d8af16d040 100644
--- a/libgfortran/runtime/deep_copy.c
+++ b/libgfortran/runtime/deep_copy.c
@@ -23,84 +23,103 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
 #include "libgfortran.h"
-#include "ISO_Fortran_binding.h"
 #include <string.h>
-#include <stdlib.h>
 
 /* Runtime helper for deep copying allocatable array components when the
-   element type contains nested allocatable components. This handles recursive
-   derived types where compile-time code generation would cause infinite
-   recursion (see GCC PR107489, Bug 114612).
-
-   This function performs element-wise assignment using CFI descriptors,
-   allowing the compiler-generated element copy function to handle nested
-   allocatables recursively at runtime.
-
-   Parameters:
-     dest: CFI descriptor pointer for destination allocatable array component
-     src:  CFI descriptor pointer for source allocatable array component
-     copy_element: Compiler-generated function to perform intrinsic assignment
-                   for a single element (handles nested allocatables)
-*/
+   element type contains nested allocatable components.  The front end handles
+   allocation and deallocation; this helper performs element-wise copies using
+   the compiler-generated element copier so that recursion takes place at
+   runtime.  */
+
+static inline size_t
+descriptor_elem_size (gfc_array_void *desc)
+{
+  size_t size = GFC_DESCRIPTOR_SIZE (desc);
+  return size == 0 ? 1 : size;
+}
+
 void
-_gfortran_cfi_deep_copy_array (CFI_cdesc_t *dest, CFI_cdesc_t *src,
-                                 void (*copy_element)(void*, void*))
+cfi_deep_copy_array (gfc_array_void *dest, gfc_array_void *src,
+                     void (*copy_element)(void*, void*))
 {
-  /* If source is not allocated, ensure destination is deallocated */
-  if (src == NULL || src->base_addr == NULL)
+  int rank;
+  size_t src_elem_size;
+  size_t dest_elem_size;
+  index_type extent[GFC_MAX_DIMENSIONS];
+  index_type src_stride_bytes[GFC_MAX_DIMENSIONS];
+  index_type dest_stride_bytes[GFC_MAX_DIMENSIONS];
+  index_type count[GFC_MAX_DIMENSIONS];
+  char *src_ptr;
+  char *dest_ptr;
+
+  if (src == NULL || dest == NULL)
+    return;
+
+  if (GFC_DESCRIPTOR_DATA (src) == NULL)
     {
-      if (dest != NULL && dest->base_addr != NULL)
-        CFI_deallocate (dest);
+      if (GFC_DESCRIPTOR_DATA (dest) != NULL)
+        internal_error (NULL, "cfi_deep_copy_array: destination must be "
+                              "deallocated when source is not allocated");
       return;
     }
 
-  /* Allocate destination to match source shape */
-  if (dest->base_addr != NULL)
-    CFI_deallocate (dest);
+  if (GFC_DESCRIPTOR_DATA (dest) == NULL)
+    internal_error (NULL, "cfi_deep_copy_array: destination not allocated");
 
-  /* Build lower and upper bounds arrays for allocation */
-  CFI_index_t lower_bounds[CFI_MAX_RANK];
-  CFI_index_t upper_bounds[CFI_MAX_RANK];
+  rank = GFC_DESCRIPTOR_RANK (src);
+  src_elem_size = descriptor_elem_size (src);
+  dest_elem_size = descriptor_elem_size (dest);
 
-  for (int i = 0; i < src->rank; i++)
+  if (rank <= 0)
     {
-      lower_bounds[i] = src->dim[i].lower_bound;
-      upper_bounds[i] = src->dim[i].lower_bound + src->dim[i].extent - 1;
+      memcpy (GFC_DESCRIPTOR_DATA (dest), GFC_DESCRIPTOR_DATA (src),
+              src_elem_size);
+      if (copy_element != NULL)
+        copy_element (GFC_DESCRIPTOR_DATA (dest),
+                      GFC_DESCRIPTOR_DATA (src));
+      return;
     }
 
-  int status = CFI_allocate (dest, lower_bounds, upper_bounds, src->elem_len);
-  if (status != CFI_SUCCESS)
+  for (int dim = 0; dim < rank; dim++)
     {
-      fprintf (stderr, "_gfortran_cfi_deep_copy_array: allocation failed\n");
-      return;
+      extent[dim] = GFC_DESCRIPTOR_EXTENT (src, dim);
+      if (extent[dim] <= 0)
+        return;
+
+      src_stride_bytes[dim]
+        = GFC_DESCRIPTOR_STRIDE (src, dim) * src_elem_size;
+      dest_stride_bytes[dim]
+        = GFC_DESCRIPTOR_STRIDE (dest, dim) * dest_elem_size;
+      count[dim] = 0;
     }
 
-  /* Calculate total number of elements */
-  size_t nelems = 1;
-  for (int i = 0; i < src->rank; i++)
-    nelems *= (size_t)src->dim[i].extent;
+  src_ptr = (char *) GFC_DESCRIPTOR_DATA (src);
+  dest_ptr = (char *) GFC_DESCRIPTOR_DATA (dest);
 
-  /* Copy each element using the compiler-generated assignment function
-     which will handle nested allocatable components recursively */
-  char *src_ptr = (char *)src->base_addr;
-  char *dest_ptr = (char *)dest->base_addr;
-
-  for (size_t idx = 0; idx < nelems; idx++)
+  while (true)
     {
+      memcpy (dest_ptr, src_ptr, src_elem_size);
       if (copy_element != NULL)
+        copy_element (dest_ptr, src_ptr);
+
+      dest_ptr += dest_stride_bytes[0];
+      src_ptr += src_stride_bytes[0];
+      count[0]++;
+
+      int dim = 0;
+      while (count[dim] == extent[dim])
         {
-          /* Use compiler-generated assignment for deep copy of nested components */
-          copy_element (dest_ptr, src_ptr);
-        }
-      else
-        {
-          /* Fallback: shallow copy if no element copier provided */
-          memcpy (dest_ptr, src_ptr, src->elem_len);
+          count[dim] = 0;
+          dest_ptr -= dest_stride_bytes[dim] * extent[dim];
+          src_ptr -= src_stride_bytes[dim] * extent[dim];
+          dim++;
+          if (dim == rank)
+            return;
+          count[dim]++;
+          dest_ptr += dest_stride_bytes[dim];
+          src_ptr += src_stride_bytes[dim];
         }
-
-      src_ptr += src->elem_len;
-      dest_ptr += dest->elem_len;
     }
 }
 
-export_proto(_gfortran_cfi_deep_copy_array);
+export_proto(cfi_deep_copy_array);
-- 
2.51.0

