Thanks Thomas! We've tested it and this brings the startup time back into a reasonable amount!
We'd quite like to see this get into GCC 13. Regards, Tamar > -----Original Message----- > From: Thomas Neumann <thomas.neum...@in.tum.de> > Sent: Friday, December 9, 2022 5:34 PM > To: gcc-patches@gcc.gnu.org > Cc: H.J. Lu <hjl.to...@gmail.com>; Jakub Jelinek <ja...@redhat.com>; > Tamar Christina <tamar.christ...@arm.com>; Jason Merrill > <ja...@redhat.com>; Jonathan Wakely <jwakely....@gmail.com>; Florian > Weimer <fwei...@redhat.com> > Subject: [PATCH] initialize fde objects lazily > > When registering an unwind frame with __register_frame_info_bases we > currently initialize that fde object eagerly. This has the advantage that it > is > immutable afterwards and we can safely access it from multiple threads, but > it has the disadvantage that we pay the initialization cost even if the > application never throws an exception. > > This commit changes the logic to initialize the objects lazily. > The objects themselves are inserted into the b-tree when registering the > frame, but the sorted fde_vector is not constructed yet. Only on the first > time that an exception tries to pass through the registered code the object is > initialized. We notice that with a double checking, first doing a relaxed > load of > the sorted bit and then re-checking under a mutex when the object was not > initialized yet. > > Note that the check must implicitly be safe concering a concurrent frame > deregistration, as trying the deregister a frame that is on the unwinding path > of a concurrent exception is inherently racy. > > libgcc/ChangeLog: > * unwind-dw2-fde.c: Initialize fde object lazily when > the first exception tries to pass through. > --- > libgcc/unwind-dw2-fde.c | 52 ++++++++++++++++++++++++++++++++----- > ---- > 1 file changed, 41 insertions(+), 11 deletions(-) > > diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c index > 3c0cc654ec0..6f69c20ff4b 100644 > --- a/libgcc/unwind-dw2-fde.c > +++ b/libgcc/unwind-dw2-fde.c > @@ -63,8 +63,6 @@ release_registered_frames (void) > > static void > get_pc_range (const struct object *ob, uintptr_type *range); -static void - > init_object (struct object *ob); > > #else > /* Without fast path frame deregistration must always succeed. */ @@ - > 76,6 +74,7 @@ static const int in_shutdown = 0; > by decreasing value of pc_begin. */ > static struct object *unseen_objects; > static struct object *seen_objects; > +#endif > > #ifdef __GTHREAD_MUTEX_INIT > static __gthread_mutex_t object_mutex = __GTHREAD_MUTEX_INIT; @@ > -103,7 +102,6 @@ init_object_mutex_once (void) > static __gthread_mutex_t object_mutex; > #endif > #endif > -#endif > > /* Called from crtbegin.o to register the unwind info for an object. */ > > @@ -126,10 +124,7 @@ __register_frame_info_bases (const void *begin, > struct object *ob, > #endif > > #ifdef ATOMIC_FDE_FAST_PATH > - // Initialize eagerly to avoid locking later > - init_object (ob); > - > - // And register the frame > + // Register the frame in the b-tree > uintptr_type range[2]; > get_pc_range (ob, range); > btree_insert (®istered_frames, range[0], range[1] - range[0], ob); @@ > -180,10 +175,7 @@ __register_frame_info_table_bases (void *begin, struct > object *ob, > ob->s.b.encoding = DW_EH_PE_omit; > > #ifdef ATOMIC_FDE_FAST_PATH > - // Initialize eagerly to avoid locking later > - init_object (ob); > - > - // And register the frame > + // Register the frame in the b-tree > uintptr_type range[2]; > get_pc_range (ob, range); > btree_insert (®istered_frames, range[0], range[1] - range[0], ob); @@ > -892,7 +884,15 @@ init_object (struct object* ob) > accu.linear->orig_data = ob->u.single; > ob->u.sort = accu.linear; > > +#ifdef ATOMIC_FDE_FAST_PATH > + // We must update the sorted bit with an atomic operation > + struct object tmp; > + tmp.s.b = ob->s.b; > + tmp.s.b.sorted = 1; > + __atomic_store (&(ob->s.b), &(tmp.s.b), __ATOMIC_SEQ_CST); #else > ob->s.b.sorted = 1; > +#endif > } > > #ifdef ATOMIC_FDE_FAST_PATH > @@ -1130,6 +1130,21 @@ search_object (struct object* ob, void *pc) > } > } > > +#ifdef ATOMIC_FDE_FAST_PATH > + > +// Check if the object was already initialized static inline bool > +is_object_initialized (struct object *ob) { > + // We have to use relaxed atomics for the read, which > + // is a bit involved as we read from a bitfield > + struct object tmp; > + __atomic_load (&(ob->s.b), &(tmp.s.b), __ATOMIC_RELAXED); > + return tmp.s.b.sorted; > +} > + > +#endif > + > const fde * > _Unwind_Find_FDE (void *pc, struct dwarf_eh_bases *bases) > { > @@ -1141,6 +1156,21 @@ _Unwind_Find_FDE (void *pc, struct > dwarf_eh_bases *bases) > if (!ob) > return NULL; > > + // Initialize the object lazily > + if (!is_object_initialized (ob)) > + { > + // Check again under mutex > + init_object_mutex_once (); > + __gthread_mutex_lock (&object_mutex); > + > + if (!ob->s.b.sorted) > + { > + init_object (ob); > + } > + > + __gthread_mutex_unlock (&object_mutex); > + } > + > f = search_object (ob, pc); > #else > > -- > 2.37.2