On Fri, Apr 11, 2025 at 6:10 PM Andi Kleen <a...@gcc.gnu.org> wrote: > > Right now ggc has a single free list for multiple sizes. In some cases > the list can get mixed by orders and then the allocator may spend a lot > of time walking the free list to find the right sizes. > > This patch splits the free list into multiple free lists by order > which allows O(1) access in most cases. > > It also has a fallback list for sizes not in the free lists > (that seems to be needed for now) > > For the PR119387 test case it gives a significant speedup, > both with and without debug information. > > Potential drawback might be some more fragmentation in the memory > map, so there is a risk that very large compilations may run into > the vma limit on Linux, or have slightly less coverage with > large pages. > > For the PR119387 test case which have extra memory overhead with -ggdb: > > ../obj-fast/gcc/cc1plus-allocpage -std=gnu++20 -O2 pr119387.cc -quiet > ran 1.04 ± 0.01 times faster than ../obj-fast/gcc/cc1plus -std=gnu++20 > -O2 pr119387.cc -quiet > 2.63 ± 0.01 times faster than ../obj-fast/gcc/cc1plus-allocpage > -std=gnu++20 -O2 pr119387.cc -quiet -ggdb > 2.78 ± 0.01 times faster than ../obj-fast/gcc/cc1plus -std=gnu++20 > -O2 pr119387.cc -quiet -ggdb > > It might also help for other test cases creating a lot of garbage.
I assume this passed bootstrap & regtest? This is OK for trunk after we've released GCC 15.1. Thanks, Richard. > gcc/ChangeLog: > > PR middle-end/114563 > PR c++/119387 > * ggc-page.cc (struct free_list): New structure. > (struct page_entry): Point to free_list. > (find_free_list): New function. > (find_free_list_order): Dito. > (alloc_page): Use specific free_list. > (release_pages): Dito. > (do_release_pages): Dito. > (init_ggc): Dito. > (ggc_print_statistics): Print overflow stat. > (ggc_pch_read): Use specific free list. > --- > gcc/ggc-page.cc | 110 ++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 92 insertions(+), 18 deletions(-) > > diff --git a/gcc/ggc-page.cc b/gcc/ggc-page.cc > index 971b4334b7c2..0f674c0d6d7c 100644 > --- a/gcc/ggc-page.cc > +++ b/gcc/ggc-page.cc > @@ -234,6 +234,8 @@ static struct > } > inverse_table[NUM_ORDERS]; > > +struct free_list; > + > /* A page_entry records the status of an allocation page. This > structure is dynamically sized to fit the bitmap in_use_p. */ > struct page_entry > @@ -251,6 +253,9 @@ struct page_entry > of the host system page size.) */ > size_t bytes; > > + /* Free list of this page size. */ > + struct free_list *free_list; > + > /* The address at which the memory is allocated. */ > char *page; > > @@ -368,6 +373,15 @@ struct free_object > }; > #endif > > +constexpr int num_free_list = 8; > + > +/* A free_list for pages with BYTES size. */ > +struct free_list > +{ > + size_t bytes; > + page_entry *free_pages; > +}; > + > /* The rest of the global variables. */ > static struct ggc_globals > { > @@ -412,8 +426,8 @@ static struct ggc_globals > int dev_zero_fd; > #endif > > - /* A cache of free system pages. */ > - page_entry *free_pages; > + /* A cache of free system pages. Entry 0 is fallback. */ > + struct free_list free_lists[num_free_list]; > > #ifdef USING_MALLOC_PAGE_GROUPS > page_group *page_groups; > @@ -488,6 +502,9 @@ static struct ggc_globals > > /* The overhead for each of the allocation orders. */ > unsigned long long total_overhead_per_order[NUM_ORDERS]; > + > + /* Number of fallbacks. */ > + unsigned long long fallback; > } stats; > } G; > > @@ -754,6 +771,42 @@ clear_page_group_in_use (page_group *group, char *page) > } > #endif > > +/* Find a free list for ENTRY_SIZE. */ > + > +static inline struct free_list * > +find_free_list (size_t entry_size) > +{ > + int i; > + for (i = 1; i < num_free_list; i++) > + { > + if (G.free_lists[i].bytes == entry_size) > + return &G.free_lists[i]; > + if (G.free_lists[i].bytes == 0) > + { > + G.free_lists[i].bytes = entry_size; > + return &G.free_lists[i]; > + } > + } > + /* Fallback. Does this happen? */ > + if (GATHER_STATISTICS) > + G.stats.fallback++; > + return &G.free_lists[0]; > +} > + > +/* Fast lookup of free_list by order. */ > + > +static struct free_list *cache_free_list[NUM_ORDERS]; > + > +/* Faster way to find a free list by ORDER for BYTES. */ > + > +static inline struct free_list * > +find_free_list_order (unsigned order, size_t bytes) > +{ > + if (cache_free_list[order] == NULL) > + cache_free_list[order] = find_free_list (bytes); > + return cache_free_list[order]; > +} > + > /* Allocate a new page for allocating objects of size 2^ORDER, > and return an entry for it. The entry is not added to the > appropriate page_table list. */ > @@ -770,6 +823,7 @@ alloc_page (unsigned order) > #ifdef USING_MALLOC_PAGE_GROUPS > page_group *group; > #endif > + struct free_list *free_list; > > num_objects = OBJECTS_PER_PAGE (order); > bitmap_size = BITMAP_SIZE (num_objects + 1); > @@ -782,8 +836,10 @@ alloc_page (unsigned order) > entry = NULL; > page = NULL; > > + free_list = find_free_list_order (order, entry_size); > + > /* Check the list of free pages for one we can use. */ > - for (pp = &G.free_pages, p = *pp; p; pp = &p->next, p = *pp) > + for (pp = &free_list->free_pages, p = *pp; p; pp = &p->next, p = *pp) > if (p->bytes == entry_size) > break; > > @@ -816,7 +872,7 @@ alloc_page (unsigned order) > /* We want just one page. Allocate a bunch of them and put the > extras on the freelist. (Can only do this optimization with > mmap for backing store.) */ > - struct page_entry *e, *f = G.free_pages; > + struct page_entry *e, *f = free_list->free_pages; > int i, entries = GGC_QUIRE_SIZE; > > page = alloc_anon (NULL, G.pagesize * GGC_QUIRE_SIZE, false); > @@ -833,12 +889,13 @@ alloc_page (unsigned order) > e = XCNEWVAR (struct page_entry, page_entry_size); > e->order = order; > e->bytes = G.pagesize; > + e->free_list = free_list; > e->page = page + (i << G.lg_pagesize); > e->next = f; > f = e; > } > > - G.free_pages = f; > + free_list->free_pages = f; > } > else > page = alloc_anon (NULL, entry_size, true); > @@ -904,12 +961,13 @@ alloc_page (unsigned order) > e = XCNEWVAR (struct page_entry, page_entry_size); > e->order = order; > e->bytes = G.pagesize; > + e->free_list = free_list; > e->page = a; > e->group = group; > e->next = f; > f = e; > } > - G.free_pages = f; > + free_list->free_pages = f; > } > } > #endif > @@ -918,6 +976,7 @@ alloc_page (unsigned order) > entry = XCNEWVAR (struct page_entry, page_entry_size); > > entry->bytes = entry_size; > + entry->free_list = free_list; > entry->page = page; > entry->context_depth = G.context_depth; > entry->order = order; > @@ -1006,14 +1065,15 @@ free_page (page_entry *entry) > > adjust_depth (); > > - entry->next = G.free_pages; > - G.free_pages = entry; > + struct free_list *free_list = entry->free_list; > + entry->next = free_list->free_pages; > + free_list->free_pages = entry; > } > > -/* Release the free page cache to the system. */ > +/* Release the free page cache for FREE_LIST to the system. */ > > static void > -release_pages (void) > +do_release_pages (struct free_list *free_list) > { > size_t n1 = 0; > size_t n2 = 0; > @@ -1031,7 +1091,7 @@ release_pages (void) > This does not always work because the free_pages list is only > approximately sorted. */ > > - p = G.free_pages; > + p = free_list->free_pages; > prev = NULL; > while (p) > { > @@ -1060,7 +1120,7 @@ release_pages (void) > if (prev) > prev->next = p; > else > - G.free_pages = p; > + free_list->free_pages = p; > G.bytes_mapped -= mapped_len; > n1 += len; > continue; > @@ -1071,7 +1131,7 @@ release_pages (void) > /* Now give back the fragmented pages to the OS, but keep the address > space to reuse it next time. */ > > - for (p = G.free_pages; p; ) > + for (p = free_list->free_pages; p; ) > { > if (p->discarded) > { > @@ -1108,7 +1168,7 @@ release_pages (void) > size_t len; > > /* Gather up adjacent pages so they are unmapped together. */ > - p = G.free_pages; > + p = free_list->free_pages; > > while (p) > { > @@ -1131,14 +1191,14 @@ release_pages (void) > G.bytes_mapped -= len; > } > > - G.free_pages = NULL; > + free_list->free_pages = NULL; > #endif > #ifdef USING_MALLOC_PAGE_GROUPS > page_entry **pp, *p; > page_group **gp, *g; > > /* Remove all pages from free page groups from the list. */ > - pp = &G.free_pages; > + pp = &free_list->free_pages; > while ((p = *pp) != NULL) > if (p->group->in_use == 0) > { > @@ -1172,6 +1232,15 @@ release_pages (void) > } > } > > +/* Release the free page cache to the system. */ > + > +static void > +release_pages () > +{ > + for (int i = 0; i < num_free_list; i++) > + do_release_pages (&G.free_lists[i]); > +} > + > /* This table provides a fast way to determine ceil(log_2(size)) for > allocation requests. The minimum allocation size is eight bytes. */ > #define NUM_SIZE_LOOKUP 512 > @@ -1801,9 +1870,10 @@ init_ggc (void) > /* We have a good page, might as well hold onto it... */ > e = XCNEW (struct page_entry); > e->bytes = G.pagesize; > + e->free_list = find_free_list (G.pagesize); > e->page = p; > - e->next = G.free_pages; > - G.free_pages = e; > + e->next = e->free_list->free_pages; > + e->free_list->free_pages = e; > } > #endif > > @@ -2404,6 +2474,9 @@ ggc_print_statistics (void) > fprintf (stderr, "Total Allocated under 128B: " > PRsa (9) "\n", > SIZE_AMOUNT (G.stats.total_allocated_under128)); > + fprintf (stderr, "Number of free list fallbacks: " > + PRsa (9) "\n", > + SIZE_AMOUNT (G.stats.fallback)); > > for (i = 0; i < NUM_ORDERS; i++) > if (G.stats.total_allocated_per_order[i]) > @@ -2677,6 +2750,7 @@ ggc_pch_read (FILE *f, void *addr) > - sizeof (long) > + BITMAP_SIZE (num_objs + 1))); > entry->bytes = bytes; > + entry->free_list = find_free_list (bytes); > entry->page = offs; > entry->context_depth = 0; > offs += bytes; > -- > 2.49.0 >