Thanks Thomas!

We've tested it and this brings the startup time back into a reasonable amount!

We'd quite like to see this get into GCC 13.

Regards,
Tamar

> -----Original Message-----
> From: Thomas Neumann <thomas.neum...@in.tum.de>
> Sent: Friday, December 9, 2022 5:34 PM
> To: gcc-patches@gcc.gnu.org
> Cc: H.J. Lu <hjl.to...@gmail.com>; Jakub Jelinek <ja...@redhat.com>;
> Tamar Christina <tamar.christ...@arm.com>; Jason Merrill
> <ja...@redhat.com>; Jonathan Wakely <jwakely....@gmail.com>; Florian
> Weimer <fwei...@redhat.com>
> Subject: [PATCH] initialize fde objects lazily
> 
> When registering an unwind frame with __register_frame_info_bases we
> currently initialize that fde object eagerly. This has the advantage that it 
> is
> immutable afterwards and we can safely access it from multiple threads, but
> it has the disadvantage that we pay the initialization cost even if the
> application never throws an exception.
> 
> This commit changes the logic to initialize the objects lazily.
> The objects themselves are inserted into the b-tree when registering the
> frame, but the sorted fde_vector is not constructed yet. Only on the first
> time that an exception tries to pass through the registered code the object is
> initialized. We notice that with a double checking, first doing a relaxed 
> load of
> the sorted bit and then re-checking under a mutex when the object was not
> initialized yet.
> 
> Note that the check must implicitly be safe concering a concurrent frame
> deregistration, as trying the deregister a frame that is on the unwinding path
> of a concurrent exception is inherently racy.
> 
> libgcc/ChangeLog:
>          * unwind-dw2-fde.c: Initialize fde object lazily when
>          the first exception tries to pass through.
> ---
>   libgcc/unwind-dw2-fde.c | 52 ++++++++++++++++++++++++++++++++-----
> ----
>   1 file changed, 41 insertions(+), 11 deletions(-)
> 
> diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c index
> 3c0cc654ec0..6f69c20ff4b 100644
> --- a/libgcc/unwind-dw2-fde.c
> +++ b/libgcc/unwind-dw2-fde.c
> @@ -63,8 +63,6 @@ release_registered_frames (void)
> 
>   static void
>   get_pc_range (const struct object *ob, uintptr_type *range); -static void -
> init_object (struct object *ob);
> 
>   #else
>   /* Without fast path frame deregistration must always succeed.  */ @@ -
> 76,6 +74,7 @@ static const int in_shutdown = 0;
>      by decreasing value of pc_begin.  */
>   static struct object *unseen_objects;
>   static struct object *seen_objects;
> +#endif
> 
>   #ifdef __GTHREAD_MUTEX_INIT
>   static __gthread_mutex_t object_mutex = __GTHREAD_MUTEX_INIT; @@
> -103,7 +102,6 @@ init_object_mutex_once (void)
>   static __gthread_mutex_t object_mutex;
>   #endif
>   #endif
> -#endif
> 
>   /* Called from crtbegin.o to register the unwind info for an object.  */
> 
> @@ -126,10 +124,7 @@ __register_frame_info_bases (const void *begin,
> struct object *ob,
>   #endif
> 
>   #ifdef ATOMIC_FDE_FAST_PATH
> -  // Initialize eagerly to avoid locking later
> -  init_object (ob);
> -
> -  // And register the frame
> +  // Register the frame in the b-tree
>     uintptr_type range[2];
>     get_pc_range (ob, range);
>     btree_insert (&registered_frames, range[0], range[1] - range[0], ob); @@
> -180,10 +175,7 @@ __register_frame_info_table_bases (void *begin, struct
> object *ob,
>     ob->s.b.encoding = DW_EH_PE_omit;
> 
>   #ifdef ATOMIC_FDE_FAST_PATH
> -  // Initialize eagerly to avoid locking later
> -  init_object (ob);
> -
> -  // And register the frame
> +  // Register the frame in the b-tree
>     uintptr_type range[2];
>     get_pc_range (ob, range);
>     btree_insert (&registered_frames, range[0], range[1] - range[0], ob); @@
> -892,7 +884,15 @@ init_object (struct object* ob)
>     accu.linear->orig_data = ob->u.single;
>     ob->u.sort = accu.linear;
> 
> +#ifdef ATOMIC_FDE_FAST_PATH
> +  // We must update the sorted bit with an atomic operation
> +  struct object tmp;
> +  tmp.s.b = ob->s.b;
> +  tmp.s.b.sorted = 1;
> +  __atomic_store (&(ob->s.b), &(tmp.s.b), __ATOMIC_SEQ_CST); #else
>     ob->s.b.sorted = 1;
> +#endif
>   }
> 
>   #ifdef ATOMIC_FDE_FAST_PATH
> @@ -1130,6 +1130,21 @@ search_object (struct object* ob, void *pc)
>       }
>   }
> 
> +#ifdef ATOMIC_FDE_FAST_PATH
> +
> +// Check if the object was already initialized static inline bool
> +is_object_initialized (struct object *ob) {
> +  // We have to use relaxed atomics for the read, which
> +  // is a bit involved as we read from a bitfield
> +  struct object tmp;
> +  __atomic_load (&(ob->s.b), &(tmp.s.b), __ATOMIC_RELAXED);
> +  return tmp.s.b.sorted;
> +}
> +
> +#endif
> +
>   const fde *
>   _Unwind_Find_FDE (void *pc, struct dwarf_eh_bases *bases)
>   {
> @@ -1141,6 +1156,21 @@ _Unwind_Find_FDE (void *pc, struct
> dwarf_eh_bases *bases)
>     if (!ob)
>       return NULL;
> 
> +  // Initialize the object lazily
> +  if (!is_object_initialized (ob))
> +    {
> +      // Check again under mutex
> +      init_object_mutex_once ();
> +      __gthread_mutex_lock (&object_mutex);
> +
> +      if (!ob->s.b.sorted)
> +     {
> +       init_object (ob);
> +     }
> +
> +      __gthread_mutex_unlock (&object_mutex);
> +    }
> +
>     f = search_object (ob, pc);
>   #else
> 
> --
> 2.37.2

Reply via email to