On Tue, Apr 29, 2014 at 3:21 PM, Trevor Saunders <tsaund...@mozilla.com> wrote: > On Tue, Apr 29, 2014 at 02:58:11PM +0200, Richard Biener wrote: >> On Tue, Apr 29, 2014 at 1:08 PM, <tsaund...@mozilla.com> wrote: >> > From: Trevor Saunders <tsaund...@mozilla.com> >> > >> > Hi, >> > >> > This implements finalizers by keeping a list of registered finalizers >> > and after every mark but before sweeping check to see if any of them are >> > for unmarked blocks. >> > >> > bootstrapped + regtested on x86_64-unknown-linux-gnu, ok? >> > >> > Trev >> > >> > gcc/ChangeLog: >> > >> > * ggc-common.c (ggc_internal_cleared_alloc): Adjust. >> > * ggc-none.c (ggc_internal_alloc): Assert if a finalizer is passed. >> > (ggc_internal_cleared_alloc): Likewise. >> > * ggc-page.c (finalizer): New class. >> > (globals::finalizer_lists): New member. >> > (ggc_internal_alloc): Record the finalizer if any for the block >> > being >> > allocated. >> > (ggc_handle_finalizers): New function. >> > (ggc_collect): Call ggc_handle_finalizers. >> > * ggc.h (ggc_internal_alloc):Add arguments to allow installing a >> > finalizer. >> > (ggc_internal_cleared_alloc): Likewise. >> > (finalize): New function. >> > (need_finalization_p): Likewise. >> > (ggc_alloc): Install the type's destructor as the finalizer if it >> > might do something. >> > (ggc_cleared_alloc): Likewise. >> > (ggc_vec_alloc): Likewise. >> > (ggc_cleared_vec_alloc): Likewise. >> > --- >> > gcc/ggc-common.c | 5 +++-- >> > gcc/ggc-none.c | 8 ++++++-- >> > gcc/ggc-page.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- >> > gcc/ggc.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++------ >> > 4 files changed, 104 insertions(+), 11 deletions(-) >> > >> > diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c >> > index e89cc64..b11a10c 100644 >> > --- a/gcc/ggc-common.c >> > +++ b/gcc/ggc-common.c >> > @@ -174,9 +174,10 @@ ggc_mark_roots (void) >> > >> > /* Allocate a block of memory, then clear it. */ >> > void * >> > -ggc_internal_cleared_alloc (size_t size MEM_STAT_DECL) >> > +ggc_internal_cleared_alloc (size_t size, void (*f)(void *), size_t s, >> > size_t n >> > + MEM_STAT_DECL) >> > { >> > - void *buf = ggc_internal_alloc (size PASS_MEM_STAT); >> > + void *buf = ggc_internal_alloc (size, f, s, n PASS_MEM_STAT); >> > memset (buf, 0, size); >> > return buf; >> > } >> > diff --git a/gcc/ggc-none.c b/gcc/ggc-none.c >> > index aad89bf..97d3566 100644 >> > --- a/gcc/ggc-none.c >> > +++ b/gcc/ggc-none.c >> > @@ -41,14 +41,18 @@ ggc_round_alloc_size (size_t requested_size) >> > } >> > >> > void * >> > -ggc_internal_alloc (size_t size MEM_STAT_DECL) >> > +ggc_internal_alloc (size_t size, void (*f)(void *), size_t, size_t >> > + MEM_STAT_DECL) >> > { >> > + gcc_assert (!f); // ggc-none doesn't support finalizers >> > return xmalloc (size); >> > } >> > >> > void * >> > -ggc_internal_cleared_alloc (size_t size MEM_STAT_DECL) >> > +ggc_internal_cleared_alloc (size_t size, void (*f)(void *), size_t, size_t >> > + MEM_STAT_DECL) >> > { >> > + gcc_assert (!f); // ggc-none doesn't support finalizers >> > return xcalloc (size, 1); >> > } >> > >> > diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c >> > index ae5e88a..208b526 100644 >> > --- a/gcc/ggc-page.c >> > +++ b/gcc/ggc-page.c >> > @@ -332,6 +332,27 @@ typedef struct page_table_chain >> > >> > #endif >> > >> > +class finalizer >> > +{ >> > +public: >> > + finalizer (uintptr_t addr, void (*f)(void *), size_t s, size_t n) : >> > + m_addr (addr), m_function (f), m_object_size (s), m_n_objects (n) {} >> > + >> > + void call () const >> > + { >> > + for (size_t i = 0; i < m_n_objects; i++) >> > + m_function (reinterpret_cast<void *> (m_addr + (i * >> > m_object_size))); >> > + } >> > + >> > + uintptr_t addr () const { return m_addr; } >> > + >> > +private: >> > + uintptr_t m_addr; >> > + void (*m_function)(void *); >> > + size_t m_object_size; >> > + size_t m_n_objects; >> > +}; >> > + >> > #ifdef ENABLE_GC_ALWAYS_COLLECT >> > /* List of free objects to be verified as actually free on the >> > next collection. */ >> > @@ -425,6 +446,9 @@ static struct globals >> > better runtime data access pattern. */ >> > unsigned long **save_in_use; >> > >> > + /* finalizer_lists[i] is the list of finalizers for blocks of order i. >> > */ >> > + vec<finalizer> finalizer_lists[NUM_ORDERS]; >> > + >> > #ifdef ENABLE_GC_ALWAYS_COLLECT >> > /* List of free objects to be verified as actually free on the >> > next collection. */ >> > @@ -1202,7 +1226,8 @@ ggc_round_alloc_size (size_t requested_size) >> > /* Allocate a chunk of memory of SIZE bytes. Its contents are undefined. >> > */ >> > >> > void * >> > -ggc_internal_alloc (size_t size MEM_STAT_DECL) >> > +ggc_internal_alloc (size_t size, void (*f)(void *), size_t s, size_t n >> > + MEM_STAT_DECL) >> > { >> > size_t order, word, bit, object_offset, object_size; >> > struct page_entry *entry; >> > @@ -1345,6 +1370,10 @@ ggc_internal_alloc (size_t size MEM_STAT_DECL) >> > /* For timevar statistics. */ >> > timevar_ggc_mem_total += object_size; >> > >> > + if (f) >> > + G.finalizer_lists[order] >> > + .safe_push (finalizer (reinterpret_cast<uintptr_t> (result), f, s, >> > n)); >> > + >> > if (GATHER_STATISTICS) >> > { >> > size_t overhead = object_size - size; >> > @@ -1811,6 +1840,24 @@ clear_marks (void) >> > } >> > } >> > >> > +static void >> > +ggc_handle_finalizers () >> > +{ >> > + for (unsigned int order = 2; order < NUM_ORDERS; order++) >> > + { >> > + unsigned int length = G.finalizer_lists[order].length (); >> > + for (unsigned int i = length - 1; i < length; i--) >> >> I suppose you are walking backwards here to handle dependences >> properly. > > I only did it because it seemed epsilon easier than iterating forward > and dealing with removals from the list.
Heh, but iterating forward is as easy as unsigned keep = 0, pick = 0; for (; pick < length; ++pick) { if (!ggc_marked (...)) { ... } else if (pick != keep) G.finalizer_list[order][keep++] = G.finalizer_list[order][pick]; } G.finalizer_list[oder].truncate (keep); either as 2nd loop doing the removal (with empty ggc_marked () case) or by doing the walk forward in the first place. With a single vector we can then impose a "reasonable" order of destruction. Or, as you say, we can also use unordered remove which should be even more efficient as it avoids all copying. > I suppose dependancies between objects could be a reason to do it > backwards, but I tend to think getting dependancies of that sort right > is impossible and you shouldn't rely on any given ordering. So just > write itempotent dtors and all's good ;) > >> But it doesn't address dependencies across different allocation >> orders (what's the reason to even have one vector per order?). > > I think I did it for locality, but otherwise I don't think there's a > great reason other than copying what other stuff does with orders. Hmm, I'd reduce it to a single vector (otherwise you'd eventually even want a vector per GC page). >> > + { >> > + finalizer &f = G.finalizer_lists[order][i]; >> > + if (!ggc_marked_p (reinterpret_cast<void *> (f.addr ()))) >> > + { >> > + f.call (); >> > + G.finalizer_lists[order].ordered_remove (i); >> >> But this clearly will be an efficiency issue. Can you delay >> removing the element from the list and instead do it in >> a second forward run over all finalizers of that order (if there >> were any finalizers run in the first)? > > Or if we agree to not care about order we can just use the non ordered > remove. Right. With a single vector and backward walk I can't think of a case that we could break though? Of course better make sure any dependence on order will break immediately (and thus finalize in the "wrong" forward order). >> I suppose that the overall complexity of walking the finalizers isn't >> that bad as we walked all reachable ggc memory anyway. > > yeah, its bad but not as bad as gc itself is my thought. > >> So the only issue would be memory inefficiencies for a lot >> of small individual objects that need finalization. Thus, >> >> > +private: >> > + uintptr_t m_addr; >> > + void (*m_function)(void *); >> > + size_t m_object_size; >> > + size_t m_n_objects; >> >> could be improved by noting that m_function and m_object_size >> are somewhat redundant (and m_object_size and m_n_objects >> are unused for non-vectors). So, instead of m_function and >> m_object_size you could store an index into a global vector >> of m_function/m_object_size pairs (eventually even statically >> compute it by gengtype.c to avoid some kind of lookup >> at allocation time). > > deciding if a type needs finalization would mean teaching gengtype about > a lot of C++ :/ Well actually maybe not, if we don't maybe we could > just have gengtype emit a blob of C++ that does it if we're really > clever, but that would take some thought. Yeah, I think we can improve on this with a followup if it turns out necessary. >> As finalization order is currently broken (see above) you could >> also have two vectors of finalizers, one for the vector case >> and one for the non-vector case. > > that certainly seems doable. Or go with this in the mean time. Actually that would be my preference - iterate forward (and document dependences are not handled) using unordered removes and have overall two vectors. >> An optimization would also be to look at the last added >> finalizer and see if the current object is adjacent to it >> and only adjust m_n_objects. (not sure if that would trigger >> often) > > What happens if they aren't freed at the same time though? Good question. Ok, so lets not do this. >> I suppose given we don't have many uses of finalizers (yet) >> this all isn't much of an issue. Still please eventually >> reduce to a single finalizer vector and avoid the ordered remove. > > both of those certainly seem doable. Thanks, Richard. > thanks! > > Trev > >> >> Thanks, >> Richard. >> >> > + } >> > + } >> > + } >> > +} >> > + >> > /* Free all empty pages. Partially empty pages need no attention >> > because the `mark' bit doubles as an `unused' bit. */ >> > >> > @@ -2075,6 +2122,7 @@ ggc_collect (void) >> > >> > clear_marks (); >> > ggc_mark_roots (); >> > + ggc_handle_finalizers (); >> > >> > if (GATHER_STATISTICS) >> > ggc_prune_overhead_list (); >> > diff --git a/gcc/ggc.h b/gcc/ggc.h >> > index 50fb199..4c59597 100644 >> > --- a/gcc/ggc.h >> > +++ b/gcc/ggc.h >> > @@ -136,13 +136,16 @@ extern void gt_pch_save (FILE *f); >> > /* Allocation. */ >> > >> > /* The internal primitive. */ >> > -extern void *ggc_internal_alloc (size_t CXX_MEM_STAT_INFO) >> > ATTRIBUTE_MALLOC; >> > +extern void *ggc_internal_alloc (size_t, void (*)(void *) = NULL, size_t >> > = 0, >> > + size_t = 1 CXX_MEM_STAT_INFO) >> > + ATTRIBUTE_MALLOC; >> > >> > extern size_t ggc_round_alloc_size (size_t requested_size); >> > >> > /* Allocates cleared memory. */ >> > -extern void *ggc_internal_cleared_alloc (size_t CXX_MEM_STAT_INFO) >> > - ATTRIBUTE_MALLOC; >> > +extern void *ggc_internal_cleared_alloc (size_t, void (*)(void *) = NULL, >> > + size_t = 0, size_t = 1 >> > + CXX_MEM_STAT_INFO) >> > ATTRIBUTE_MALLOC; >> > >> > /* Resize a block. */ >> > extern void *ggc_realloc (void *, size_t CXX_MEM_STAT_INFO); >> > @@ -157,24 +160,55 @@ extern void dump_ggc_loc_statistics (bool); >> > ((T *) ggc_realloc ((P), (N) * sizeof (T) MEM_STAT_INFO)) >> > >> > template<typename T> >> > +void >> > +finalize (void *p) >> > +{ >> > + static_cast<T *> (p)->~T (); >> > +} >> > + >> > +template<typename T> >> > +static inline bool >> > +need_finalization_p () >> > +{ >> > +#if GCC_VERSION >= 4003 >> > + return !__has_trivial_destructor (T); >> > +#else >> > + return true; >> > +#endif >> > +} >> > + >> > +template<typename T> >> > static inline T * >> > ggc_alloc (ALONE_CXX_MEM_STAT_INFO) >> > { >> > - return static_cast<T *> (ggc_internal_alloc (sizeof (T) PASS_MEM_STAT)); >> > + if (need_finalization_p<T> ()) >> > + return static_cast<T *> (ggc_internal_alloc (sizeof (T), finalize<T> >> > + PASS_MEM_STAT)); >> > + else >> > + return static_cast<T *> (ggc_internal_alloc (sizeof (T) >> > PASS_MEM_STAT)); >> > } >> > >> > template<typename T> >> > static inline T * >> > ggc_cleared_alloc (ALONE_CXX_MEM_STAT_INFO) >> > { >> > - return static_cast<T *> (ggc_internal_cleared_alloc (sizeof (T) >> > - PASS_MEM_STAT)); >> > + if (need_finalization_p<T> ()) >> > + return static_cast<T *> (ggc_internal_cleared_alloc (sizeof (T), >> > + finalize<T> >> > + PASS_MEM_STAT)); >> > + else >> > + return static_cast<T *> (ggc_internal_cleared_alloc (sizeof (T) >> > + PASS_MEM_STAT)); >> > } >> > >> > template<typename T> >> > static inline T * >> > ggc_vec_alloc (size_t c CXX_MEM_STAT_INFO) >> > { >> > + if (need_finalization_p<T> ()) >> > + return static_cast<T *> (ggc_internal_alloc (c * sizeof (T), >> > finalize<T>, >> > + sizeof (T), c >> > PASS_MEM_STAT)); >> > + else >> > return static_cast<T *> (ggc_internal_alloc (c * sizeof (T) >> > PASS_MEM_STAT)); >> > } >> > @@ -183,6 +217,12 @@ template<typename T> >> > static inline T * >> > ggc_cleared_vec_alloc (size_t c CXX_MEM_STAT_INFO) >> > { >> > + if (need_finalization_p<T> ()) >> > + return static_cast<T *> (ggc_internal_cleared_alloc (c * sizeof (T), >> > + finalize<T>, >> > + sizeof (T), c >> > + PASS_MEM_STAT)); >> > + else >> > return static_cast<T *> (ggc_internal_cleared_alloc (c * sizeof (T) >> > PASS_MEM_STAT)); >> > } >> > -- >> > 2.0.0.rc0 >> >