Hi,

this transformation has been in our tree for a couple of years and was 
originally developed for SPARKSkein (http://www.skein-hash.info/node/48), 
which is the implementation in SPARK (subset of Ada) of the Skein algorithm.

When nested functions access local variables of their parent, the compiler 
creates a special FRAME local variable in the parent, which represents the 
non-local frame, and puts into it all the variables accessed non-locally.

If these nested functions are later inlined into their parent, these FRAME 
variables generally remain unmodified and this has various drawbacks:
 1) the frame of the parent is unnecessarily large,
 2) scalarization of aggregates put into the FRAME variables is hindered,
 3) debug info for scalars put into the FRAME variables is poor since VTA only 
works on GIMPLE registers.

The attached patch makes it so that the compiler splits FRAME variables back 
into pieces when all the nested functions have been inlined.  The natural 
place to implement the transformation would probably be the SRA pass, but this 
would require a special path to work around all the heuristics and the pass is 
already complicated enough (sorry Martin ;-)  The transformation is therefore 
implemented as a sub-pass of execute_update_addresses_taken for technical 
reasons exposed in the patch.

Tested on x86-64/Linux, OK for the mainline?


2012-09-19  Eric Botcazou  <ebotca...@adacore.com>

        * tree.h (DECL_NONLOCAL_FRAME): New macro.
        * gimple.c (gimple_ior_addresses_taken_1): Handle non-local frame
        structures specially.
        * tree-nested.c (get_frame_type): Set DECL_NONLOCAL_FRAME.
        * tree-ssa.c (lookup_decl_for_field): New static function.
        (split_nonlocal_frames_op): Likewise.
        (execute_update_addresses_taken): Break up non-local frame structures
        into variables when possible.
        * tree-streamer-in.c (unpack_ts_decl_common_value_fields): Stream in
        DECL_NONLOCAL_FRAME flag.
        * tree-streamer-out.c (pack_ts_decl_common_value_fields): Stream out
        DECL_NONLOCAL_FRAME flag.


2012-09-19  Eric Botcazou  <ebotca...@adacore.com>

        * gcc.dg/nested-func-9.c: New test.


-- 
Eric Botcazou
Index: tree.h
===================================================================
--- tree.h	(revision 191365)
+++ tree.h	(working copy)
@@ -712,6 +712,9 @@ struct GTY(()) tree_base {
 
        SSA_NAME_IS_DEFAULT_DEF in
            SSA_NAME
+
+       DECL_NONLOCAL_FRAME in
+	   VAR_DECL
 */
 
 struct GTY(()) tree_typed {
@@ -3268,9 +3271,14 @@ extern void decl_fini_priority_insert (t
    libraries.  */
 #define MAX_RESERVED_INIT_PRIORITY 100
 
+/* In a VAR_DECL, nonzero if this is a global variable for VOPs.  */
 #define VAR_DECL_IS_VIRTUAL_OPERAND(NODE) \
   (VAR_DECL_CHECK (NODE)->base.u.bits.saturating_flag)
 
+/* In a VAR_DECL, nonzero if this is a non-local frame structure.  */
+#define DECL_NONLOCAL_FRAME(NODE)  \
+  (VAR_DECL_CHECK (NODE)->base.default_def_flag)
+
 struct GTY(()) tree_var_decl {
   struct tree_decl_with_vis common;
 };
Index: tree-streamer-out.c
===================================================================
--- tree-streamer-out.c	(revision 191365)
+++ tree-streamer-out.c	(working copy)
@@ -181,6 +181,9 @@ pack_ts_decl_common_value_fields (struct
       bp_pack_value (bp, expr->decl_common.off_align, 8);
     }
 
+  if (TREE_CODE (expr) == VAR_DECL)
+    bp_pack_value (bp, DECL_NONLOCAL_FRAME (expr), 1);
+
   if (TREE_CODE (expr) == RESULT_DECL
       || TREE_CODE (expr) == PARM_DECL
       || TREE_CODE (expr) == VAR_DECL)
Index: tree-nested.c
===================================================================
--- tree-nested.c	(revision 191365)
+++ tree-nested.c	(working copy)
@@ -235,6 +235,7 @@ get_frame_type (struct nesting_info *inf
 
       info->frame_type = type;
       info->frame_decl = create_tmp_var_for (info, type, "FRAME");
+      DECL_NONLOCAL_FRAME (info->frame_decl) = 1;
 
       /* ??? Always make it addressable for now, since it is meant to
 	 be pointed to by the static chain pointer.  This pessimizes
Index: tree-ssa.c
===================================================================
--- tree-ssa.c	(revision 191365)
+++ tree-ssa.c	(working copy)
@@ -1930,6 +1930,152 @@ maybe_optimize_var (tree var, bitmap add
     }
 }
 
+
+struct walk_info_data
+{
+  /* Map of fields in non-local frame structures to variables.  */
+  struct pointer_map_t *field_map;
+
+  /* Bitmap of variables whose address is taken.  */
+  bitmap addresses_taken;
+
+  /* Bitmap of variables to be renamed.  */
+  bitmap suitable_for_renaming;
+};
+
+/* Given FIELD, a field in a non-local frame structure, find or create a
+   variable in the current function and register it with MAP.  This is
+   the reverse function of tree-nested.c:lookup_field_for_decl.  */
+
+static tree
+lookup_decl_for_field (tree field, struct pointer_map_t *map,
+		       bitmap suitable_for_renaming)
+{
+  void **slot = pointer_map_insert (map, field);
+  if (!*slot)
+    {
+      tree decl =  build_decl (DECL_SOURCE_LOCATION (field), VAR_DECL,
+			       DECL_NAME (field), TREE_TYPE (field));
+      /* Some targets limit alignment of fields to a lower boundary than
+         that of variables unless it was overridden by attribute aligned.  */
+      if (DECL_USER_ALIGN (field))
+	{
+	  DECL_ALIGN (decl) = DECL_ALIGN (field);
+	  DECL_USER_ALIGN (decl) = 1;
+	}
+      else
+	DECL_ALIGN (decl) = TYPE_ALIGN (TREE_TYPE (field));
+      TREE_ADDRESSABLE (decl) = !DECL_NONADDRESSABLE_P (field);
+      TREE_THIS_VOLATILE (decl) = TREE_THIS_VOLATILE (field);
+      TREE_USED (decl) = 1;
+      gimple_add_tmp_var (decl);
+      if (!TREE_ADDRESSABLE (decl) && is_gimple_reg (decl))
+	bitmap_set_bit (suitable_for_renaming, DECL_UID (decl));
+      *slot = decl;
+    }
+
+  return (tree) *slot;
+}
+
+/* Callback for walk_gimple_op.  Replace component and memory references to
+   non-local frame structures pointed to by TP with individual variables.  */
+
+static tree
+split_nonlocal_frames_op (tree *tp, int *walk_subtrees, void *info)
+{
+  struct walk_info_data *data
+    = (struct walk_info_data *) ((struct walk_stmt_info *)info)->info;
+  bool address_taken;
+
+  if (TREE_CODE (*tp) == ADDR_EXPR)
+    {
+      tp = &TREE_OPERAND (*tp, 0);
+      address_taken = true;
+    }
+  else if (TREE_CODE (*tp) == TARGET_MEM_REF
+	   && TREE_CODE (TMR_BASE (*tp)) == ADDR_EXPR)
+    {
+      tp = &TREE_OPERAND (TMR_BASE (*tp), 0);
+      address_taken = true;
+    }
+  else if (TREE_CODE (*tp) == OBJ_TYPE_REF
+	   && TREE_CODE (OBJ_TYPE_REF_OBJECT (*tp)) == ADDR_EXPR)
+    {
+      tp = &TREE_OPERAND (OBJ_TYPE_REF_OBJECT (*tp), 0);
+      address_taken = true;
+    }
+  else
+    address_taken = false;
+
+  for (; handled_component_p (*tp); tp = &TREE_OPERAND (*tp, 0))
+    if (TREE_CODE (*tp) == COMPONENT_REF)
+      {
+	tree base = TREE_OPERAND (*tp, 0);
+
+	/* Deal with transparent MEM_REFs around the base.  */
+	if (TREE_CODE (base) == MEM_REF
+	    && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR
+	    && DECL_CONTEXT (TREE_OPERAND (*tp, 1))
+	       == TYPE_MAIN_VARIANT
+		  (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (base, 0), 0))))
+	  {
+	    gcc_assert (integer_zerop (TREE_OPERAND (base, 1)));
+	    base = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
+	  }
+
+	if (TREE_CODE (base) == VAR_DECL
+	    && DECL_NONLOCAL_FRAME (base)
+	    && !bitmap_bit_p (data->addresses_taken, DECL_UID (base)))
+	  {
+	    tree decl
+	      = lookup_decl_for_field (TREE_OPERAND (*tp, 1), data->field_map,
+				       data->suitable_for_renaming);
+	    if (address_taken)
+	      bitmap_set_bit (data->addresses_taken, DECL_UID (decl));
+	    *tp = decl;
+	    break;
+	  }
+      }
+
+  /* Deal with remaining MEM_REFs, i.e. those for which the field reference
+     has been replaced with the offset.  */
+  if (TREE_CODE (*tp) == MEM_REF
+      && TREE_CODE (TREE_OPERAND (*tp, 0)) == ADDR_EXPR)
+    {
+      tree base = TREE_OPERAND (TREE_OPERAND (*tp, 0), 0);
+
+      if (TREE_CODE (base) == VAR_DECL
+	  && DECL_NONLOCAL_FRAME (base)
+	  && !bitmap_bit_p (data->addresses_taken, DECL_UID (base)))
+	{
+	  tree frame_type = TREE_TYPE (base);
+	  tree offset = TREE_OPERAND (*tp, 1);
+	  tree field, next, decl, addr;
+
+	  for (field = TYPE_FIELDS (frame_type), next = DECL_CHAIN (field);
+	       next;
+	       field = next, next = DECL_CHAIN (next))
+	    if (tree_int_cst_lt (offset, byte_position (next)))
+	      break;
+
+	  decl = lookup_decl_for_field (field, data->field_map,
+					data->suitable_for_renaming);
+	  if (address_taken)
+	    bitmap_set_bit (data->addresses_taken, DECL_UID (decl));
+
+	  addr = build_fold_addr_expr (decl);
+	  if (TREE_TYPE (offset) == TREE_TYPE (TREE_OPERAND (*tp, 0)))
+	    offset = fold_convert (TREE_TYPE (addr), offset);
+	  *tp = fold_build2 (MEM_REF, TREE_TYPE (*tp), addr,
+			     int_const_binop (MINUS_EXPR, offset,
+					      byte_position (field)));
+	}
+    }
+
+  *walk_subtrees = 0;
+  return NULL_TREE;
+}
+
 /* Compute TREE_ADDRESSABLE and DECL_GIMPLE_REG_P for local variables.  */
 
 void
@@ -1940,6 +2086,7 @@ execute_update_addresses_taken (void)
   bitmap addresses_taken = BITMAP_ALLOC (NULL);
   bitmap not_reg_needs = BITMAP_ALLOC (NULL);
   bitmap suitable_for_renaming = BITMAP_ALLOC (NULL);
+  bool split_nonlocal_frames = false;
   tree var;
   unsigned i;
 
@@ -2034,6 +2181,38 @@ execute_update_addresses_taken (void)
 	}
     }
 
+  /* If non-local frame structures don't have their address taken as a whole,
+     they can be broken up back into variables since they are never accessed
+     directly as a whole.  */
+  FOR_EACH_VEC_ELT (tree, cfun->local_decls, i, var)
+    if (TREE_CODE (var) == VAR_DECL
+	&& DECL_NONLOCAL_FRAME (var)
+	&& !bitmap_bit_p (addresses_taken, DECL_UID (var)))
+      {
+	split_nonlocal_frames = true;
+	break;
+      }
+
+  /* Break up non-local frame structures when possible.  This will in turn
+     expose more scalarization opportunities for subsequent SRA passes.  */
+  if (split_nonlocal_frames)
+    {
+      struct walk_stmt_info wi;
+      struct walk_info_data data;
+
+      data.field_map = pointer_map_create ();
+      data.addresses_taken = addresses_taken;
+      data.suitable_for_renaming = suitable_for_renaming;
+      memset (&wi, 0, sizeof (wi));
+      wi.info = &data;
+
+      FOR_EACH_BB (bb)
+	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	  walk_gimple_op (gsi_stmt (gsi), split_nonlocal_frames_op, &wi);
+
+      pointer_map_destroy (data.field_map);
+    }
+
   /* We cannot iterate over all referenced vars because that can contain
      unused vars from BLOCK trees, which causes code generation differences
      for -g vs. -g0.  */
Index: tree-streamer-in.c
===================================================================
--- tree-streamer-in.c	(revision 191365)
+++ tree-streamer-in.c	(working copy)
@@ -217,6 +217,9 @@ unpack_ts_decl_common_value_fields (stru
       expr->decl_common.off_align = bp_unpack_value (bp, 8);
     }
 
+  if (TREE_CODE (expr) == VAR_DECL)
+    DECL_NONLOCAL_FRAME (expr) = (unsigned) bp_unpack_value (bp, 1);
+
   if (TREE_CODE (expr) == RESULT_DECL
       || TREE_CODE (expr) == PARM_DECL
       || TREE_CODE (expr) == VAR_DECL)
Index: gimple.c
===================================================================
--- gimple.c	(revision 191365)
+++ gimple.c	(working copy)
@@ -4071,11 +4071,16 @@ gimple_ior_addresses_taken_1 (gimple stm
 			      tree addr, void *data)
 {
   bitmap addresses_taken = (bitmap)data;
-  addr = get_base_address (addr);
-  if (addr
-      && DECL_P (addr))
+  tree base = get_base_address (addr);
+  if (base && DECL_P (base))
     {
-      bitmap_set_bit (addresses_taken, DECL_UID (addr));
+      /* Require the address of the whole object to be taken for non-local
+	 frame structures.  */
+      if (TREE_CODE (base) == VAR_DECL
+	  && DECL_NONLOCAL_FRAME (base)
+	  && base != addr)
+	return false;
+      bitmap_set_bit (addresses_taken, DECL_UID (base));
       return true;
     }
   return false;
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-optimized" } */

struct S
{
  int a,b,c,d;
};

int eval (struct S s)
{
  int arr[16];
  int a = s.a;
  int b = s.b;
  int c = s.c;
  int d = s.d;
  int i;

  int do_eval (void)
  {
    int acc = 0;
    for (i = 0; i < 16; i++)
      acc += arr[i];
    return a + b + c + d + acc;
  }

  for (i = 0; i < 16; i++)
    arr[i] = i;

  return do_eval ();
}

/* { dg-final { scan-tree-dump-not "FRAME" "optimized" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

Reply via email to