From bfb1e356e7e6848736218608eca953569361cf18 Mon Sep 17 00:00:00 2001
From: Francois-Xavier Coudert <fxcoudert@gcc.gnu.org>
Date: Sat, 5 Aug 2023 14:54:11 +0200
Subject: [PATCH 1/3] core: Support heap-based trampolines

Generate heap-based nested function trampolines

Add support for allocating nested function trampolines on an
executable heap rather than on the stack. This is motivated by targets
such as AArch64 Darwin, which globally prohibit executing code on the
stack.

The target-specific routines for allocating and writing trampolines are
to be provided in libgcc.

The gcc flag -ftrampoline-impl controls whether to generate code
that instantiates trampolines on the stack, or to emit calls to
__builtin_nested_func_ptr_created and
__builtin_nested_func_ptr_deleted. Note that this flag is completely
independent of libgcc: If libgcc is for any reason missing those
symbols, you will get a link failure.

This implementation imposes some implicit restrictions as compared to
stack trampolines. longjmp'ing back to a state before a trampoline was
created will cause us to skip over the corresponding
__builtin_nested_func_ptr_deleted, which will leak trampolines
starting from the beginning of the linked list of allocated
trampolines. There may be scope for instrumenting longjmp/setjmp to
trigger cleanups of trampolines.

Co-Authored-By: Andrew Burgess <andrew.burgess@embecosm.com>
Co-Authored-By: Iain Sandoe <iain@sandoe.co.uk>

gcc/ChangeLog:

	* builtins.def (BUILT_IN_NESTED_PTR_CREATED): Define.
	(BUILT_IN_NESTED_PTR_DELETED): Ditto.
	* common.opt (ftrampoline-impl): Add option to control
	generation of trampoline instantiation (heap or stack).
	* coretypes.h: Define enum trampoline_impl.
	* tree-nested.cc (convert_tramp_reference_op): Don't bother calling
	__builtin_adjust_trampoline for heap trampolines.
	(finalize_nesting_tree_1): Emit calls to
	__builtin_nested_...{created,deleted} if we're generating with
	-ftrampoline-impl=heap.
	* tree.cc (build_common_builtin_nodes): Build
	__builtin_nested_...{created,deleted}.
	* doc/invoke.texi (-ftrampoline-impl): Document.
---
 gcc/builtins.def    |   2 +
 gcc/common.opt      |  17 ++++++-
 gcc/coretypes.h     |   6 +++
 gcc/doc/invoke.texi |  17 ++++++-
 gcc/tree-nested.cc  | 121 +++++++++++++++++++++++++++++++++++++-------
 gcc/tree.cc         |  17 +++++++
 6 files changed, 161 insertions(+), 19 deletions(-)

diff --git a/gcc/builtins.def b/gcc/builtins.def
index 5953266acba..7a7987100d1 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -1074,6 +1074,8 @@ DEF_BUILTIN_STUB (BUILT_IN_ADJUST_TRAMPOLINE, "__builtin_adjust_trampoline")
 DEF_BUILTIN_STUB (BUILT_IN_INIT_DESCRIPTOR, "__builtin_init_descriptor")
 DEF_BUILTIN_STUB (BUILT_IN_ADJUST_DESCRIPTOR, "__builtin_adjust_descriptor")
 DEF_BUILTIN_STUB (BUILT_IN_NONLOCAL_GOTO, "__builtin_nonlocal_goto")
+DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_CREATED, "__builtin_nested_func_ptr_created")
+DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_DELETED, "__builtin_nested_func_ptr_deleted")
 
 /* Implementing __builtin_setjmp.  */
 DEF_BUILTIN_STUB (BUILT_IN_SETJMP_SETUP, "__builtin_setjmp_setup")
diff --git a/gcc/common.opt b/gcc/common.opt
index 0888c15b88f..949307a4414 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2884,10 +2884,25 @@ Common Var(flag_tracer) Optimization
 Perform superblock formation via tail duplication.
 
 ftrampolines
-Common Var(flag_trampolines) Init(0)
+Common Var(flag_trampolines) Init(HEAP_TRAMPOLINES_INIT)
 For targets that normally need trampolines for nested functions, always
 generate them instead of using descriptors.
 
+ftrampoline-impl=
+Common Joined RejectNegative Enum(trampoline_impl) Var(flag_trampoline_impl) Init(HEAP_TRAMPOLINES_INIT ? TRAMPOLINE_IMPL_HEAP : TRAMPOLINE_IMPL_STACK)
+Whether trampolines are generated in executable memory rather than
+executable stack.
+
+Enum
+Name(trampoline_impl) Type(enum trampoline_impl) UnknownError(unknown trampoline implementation %qs)
+
+EnumValue
+Enum(trampoline_impl) String(stack) Value(TRAMPOLINE_IMPL_STACK)
+
+EnumValue
+Enum(trampoline_impl) String(heap) Value(TRAMPOLINE_IMPL_HEAP)
+
+
 ; Zero means that floating-point math operations cannot generate a
 ; (user-visible) trap.  This is the case, for example, in nonstop
 ; IEEE 754 arithmetic.
diff --git a/gcc/coretypes.h b/gcc/coretypes.h
index ca8837cef67..7e022a427c4 100644
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -199,6 +199,12 @@ enum tls_model {
   TLS_MODEL_LOCAL_EXEC
 };
 
+/* Types of trampoline implementation.  */
+enum trampoline_impl {
+  TRAMPOLINE_IMPL_STACK,
+  TRAMPOLINE_IMPL_HEAP
+};
+
 /* Types of ABI for an offload compiler.  */
 enum offload_abi {
   OFFLOAD_ABI_UNSET,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 674f956f4b8..13e13728621 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -711,7 +711,8 @@ Objective-C and Objective-C++ Dialects}.
 -fverbose-asm  -fpack-struct[=@var{n}]
 -fleading-underscore  -ftls-model=@var{model}
 -fstack-reuse=@var{reuse_level}
--ftrampolines  -ftrapv  -fwrapv
+-ftrampolines -ftrampoline-impl=@r{[}stack@r{|}heap@r{]}
+-ftrapv  -fwrapv
 -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]}
 -fstrict-volatile-bitfields  -fsync-libcalls}
 
@@ -18834,6 +18835,20 @@ For languages other than Ada, the @code{-ftrampolines} and
 trampolines are always generated on platforms that need them
 for nested functions.
 
+@opindex ftrampoline-impl
+@item -ftrampoline-impl=@r{[}stack@r{|}heap@r{]}
+By default, trampolines are generated on stack.  However, certain platforms
+(such as the Apple M1) do not permit an executable stack.  Compiling with
+@option{-ftrampoline-impl=heap} generate calls to
+@code{__builtin_nested_func_ptr_created} and
+@code{__builtin_nested_func_ptr_deleted} in order to allocate and
+deallocate trampoline space on the executable heap.  These functions are
+implemented in libgcc, and will only be provided on specific targets:
+x86_64 Darwin, x86_64 and aarch64 Linux.  @emph{PLEASE NOTE}: Heap
+trampolines are @emph{not} guaranteed to be correctly deallocated if you
+@code{setjmp}, instantiate nested functions, and then @code{longjmp} back
+to a state prior to having allocated those nested functions.
+
 @opindex fvisibility
 @item -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]}
 Set the default ELF image symbol visibility to the specified option---all
diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc
index ae7d1f1f6a8..84ee9962485 100644
--- a/gcc/tree-nested.cc
+++ b/gcc/tree-nested.cc
@@ -611,6 +611,14 @@ get_trampoline_type (struct nesting_info *info)
   if (trampoline_type)
     return trampoline_type;
 
+  /* When trampolines are created off-stack then the only thing we need in the
+     local frame is a single pointer.  */
+  if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+    {
+      trampoline_type = build_pointer_type (void_type_node);
+      return trampoline_type;
+    }
+
   align = TRAMPOLINE_ALIGNMENT;
   size = TRAMPOLINE_SIZE;
 
@@ -2788,17 +2796,27 @@ convert_tramp_reference_op (tree *tp, int *walk_subtrees, void *data)
 
       /* Compute the address of the field holding the trampoline.  */
       x = get_frame_field (info, target_context, x, &wi->gsi);
-      x = build_addr (x);
-      x = gsi_gimplify_val (info, x, &wi->gsi);
 
-      /* Do machine-specific ugliness.  Normally this will involve
-	 computing extra alignment, but it can really be anything.  */
-      if (descr)
-	builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR);
+      /* APB: We don't need to do the adjustment calls when using off-stack
+	 trampolines, any such adjustment will be done when the off-stack
+	 trampoline is created.  */
+      if (!descr && flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+	x = gsi_gimplify_val (info, x, &wi->gsi);
       else
-	builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
-      call = gimple_build_call (builtin, 1, x);
-      x = init_tmp_var_with_call (info, &wi->gsi, call);
+	{
+	  x = build_addr (x);
+
+	  x = gsi_gimplify_val (info, x, &wi->gsi);
+
+	  /* Do machine-specific ugliness.  Normally this will involve
+	     computing extra alignment, but it can really be anything.  */
+	  if (descr)
+	    builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR);
+	  else
+	    builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
+	  call = gimple_build_call (builtin, 1, x);
+	  x = init_tmp_var_with_call (info, &wi->gsi, call);
+	}
 
       /* Cast back to the proper function type.  */
       x = build1 (NOP_EXPR, TREE_TYPE (t), x);
@@ -3377,6 +3395,7 @@ build_init_call_stmt (struct nesting_info *info, tree decl, tree field,
 static void
 finalize_nesting_tree_1 (struct nesting_info *root)
 {
+  gimple_seq cleanup_list = NULL;
   gimple_seq stmt_list = NULL;
   gimple *stmt;
   tree context = root->context;
@@ -3508,9 +3527,48 @@ finalize_nesting_tree_1 (struct nesting_info *root)
 	  if (!field)
 	    continue;
 
-	  x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
-	  stmt = build_init_call_stmt (root, i->context, field, x);
-	  gimple_seq_add_stmt (&stmt_list, stmt);
+	  if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+	    {
+	      /* We pass a whole bunch of arguments to the builtin function that
+		 creates the off-stack trampoline, these are
+		 1. The nested function chain value (that must be passed to the
+		 nested function so it can find the function arguments).
+		 2. A pointer to the nested function implementation,
+		 3. The address in the local stack frame where we should write
+		 the address of the trampoline.
+
+		 When this code was originally written I just kind of threw
+		 everything at the builtin, figuring I'd work out what was
+		 actually needed later, I think, the stack pointer could
+		 certainly be dropped, arguments #2 and #4 are based off the
+		 stack pointer anyway, so #1 doesn't seem to add much value.  */
+	      tree arg1, arg2, arg3;
+
+	      gcc_assert (DECL_STATIC_CHAIN (i->context));
+	      arg1 = build_addr (root->frame_decl);
+	      arg2 = build_addr (i->context);
+
+	      x = build3 (COMPONENT_REF, TREE_TYPE (field),
+			  root->frame_decl, field, NULL_TREE);
+	      arg3 = build_addr (x);
+
+	      x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_CREATED);
+	      stmt = gimple_build_call (x, 3, arg1, arg2, arg3);
+	      gimple_seq_add_stmt (&stmt_list, stmt);
+
+	      /* This call to delete the nested function trampoline is added to
+		 the cleanup list, and called when we exit the current scope.  */
+	      x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_DELETED);
+	      stmt = gimple_build_call (x, 0);
+	      gimple_seq_add_stmt (&cleanup_list, stmt);
+	    }
+	  else
+	    {
+	      /* Original code to initialise the on stack trampoline.  */
+	      x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
+	      stmt = build_init_call_stmt (root, i->context, field, x);
+	      gimple_seq_add_stmt (&stmt_list, stmt);
+	    }
 	}
     }
 
@@ -3535,11 +3593,40 @@ finalize_nesting_tree_1 (struct nesting_info *root)
   /* If we created initialization statements, insert them.  */
   if (stmt_list)
     {
-      gbind *bind;
-      annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
-      bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
-      gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
-      gimple_bind_set_body (bind, stmt_list);
+      if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+	{
+	  /* Handle off-stack trampolines.  */
+	  gbind *bind;
+	  annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
+	  annotate_all_with_location (cleanup_list, DECL_SOURCE_LOCATION (context));
+	  bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
+	  gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
+
+	  gimple_seq xxx_list = NULL;
+
+	  if (cleanup_list != NULL)
+	    {
+	      /* Maybe we shouldn't be creating this try/finally if -fno-exceptions is
+		 in use.  If this is the case, then maybe we should, instead, be
+		 inserting the cleanup code onto every path out of this function?  Not
+		 yet figured out how we would do this.  */
+	      gtry *t = gimple_build_try (stmt_list, cleanup_list, GIMPLE_TRY_FINALLY);
+	      gimple_seq_add_stmt (&xxx_list, t);
+	    }
+	  else
+	    xxx_list = stmt_list;
+
+	  gimple_bind_set_body (bind, xxx_list);
+	}
+      else
+	{
+	  /* The traditional, on stack trampolines.  */
+	  gbind *bind;
+	  annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
+	  bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
+	  gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
+	  gimple_bind_set_body (bind, stmt_list);
+	}
     }
 
   /* If a chain_decl was created, then it needs to be registered with
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 420857b110c..3e7beba8744 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -9870,6 +9870,23 @@ build_common_builtin_nodes (void)
 			"__builtin_nonlocal_goto",
 			ECF_NORETURN | ECF_NOTHROW);
 
+  tree ptr_ptr_type_node = build_pointer_type (ptr_type_node);
+
+  ftype = build_function_type_list (void_type_node,
+				    ptr_type_node, // void *chain
+				    ptr_type_node, // void *func
+				    ptr_ptr_type_node, // void **dst
+				    NULL_TREE);
+  local_define_builtin ("__builtin_nested_func_ptr_created", ftype,
+			BUILT_IN_NESTED_PTR_CREATED,
+			"__builtin_nested_func_ptr_created", ECF_NOTHROW);
+
+  ftype = build_function_type_list (void_type_node,
+				    NULL_TREE);
+  local_define_builtin ("__builtin_nested_func_ptr_deleted", ftype,
+			BUILT_IN_NESTED_PTR_DELETED,
+			"__builtin_nested_func_ptr_deleted", ECF_NOTHROW);
+
   ftype = build_function_type_list (void_type_node,
 				    ptr_type_node, ptr_type_node, NULL_TREE);
   local_define_builtin ("__builtin_setjmp_setup", ftype,
-- 
2.39.2 (Apple Git-143)

