Right now ggc has a single free list for multiple sizes. In some cases
the list can get mixed by orders and then the allocator may spend a lot
of time walking the free list to find the right sizes.

This patch splits the free list into multiple free lists by order
which allows O(1) access in most cases.

It also has a fallback list for sizes not in the free lists
(that seems to be needed for now)

For the PR119387 test case it gives a significant speedup,
both with and without debug information.

Potential drawback might be some more fragmentation in the memory
map, so there is a risk that very large compilations may run into
the vma limit on Linux, or have slightly less coverage with
large pages.

For the PR119387 test case which have extra memory overhead with -ggdb:

      ../obj-fast/gcc/cc1plus-allocpage -std=gnu++20 -O2 pr119387.cc -quiet
    ran 1.04 ± 0.01 times faster than ../obj-fast/gcc/cc1plus -std=gnu++20 -O2 
pr119387.cc  -quiet
        2.63 ± 0.01 times faster than ../obj-fast/gcc/cc1plus-allocpage 
-std=gnu++20 -O2 pr119387.cc  -quiet -ggdb
        2.78 ± 0.01 times faster than ../obj-fast/gcc/cc1plus -std=gnu++20 -O2 
pr119387.cc  -quiet -ggdb

It might also help for other test cases creating a lot of garbage.

gcc/ChangeLog:

        PR middle-end/114563
        PR c++/119387
        * ggc-page.cc (struct free_list): New structure.
        (struct page_entry): Point to free_list.
        (find_free_list): New function.
        (find_free_list_order): Dito.
        (alloc_page): Use specific free_list.
        (release_pages): Dito.
        (do_release_pages): Dito.
        (init_ggc): Dito.
        (ggc_print_statistics): Print overflow stat.
        (ggc_pch_read): Use specific free list.
---
 gcc/ggc-page.cc | 110 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 92 insertions(+), 18 deletions(-)

diff --git a/gcc/ggc-page.cc b/gcc/ggc-page.cc
index 971b4334b7c2..0f674c0d6d7c 100644
--- a/gcc/ggc-page.cc
+++ b/gcc/ggc-page.cc
@@ -234,6 +234,8 @@ static struct
 }
 inverse_table[NUM_ORDERS];
 
+struct free_list;
+
 /* A page_entry records the status of an allocation page.  This
    structure is dynamically sized to fit the bitmap in_use_p.  */
 struct page_entry
@@ -251,6 +253,9 @@ struct page_entry
      of the host system page size.)  */
   size_t bytes;
 
+  /* Free list of this page size.  */
+  struct free_list *free_list;
+
   /* The address at which the memory is allocated.  */
   char *page;
 
@@ -368,6 +373,15 @@ struct free_object
 };
 #endif
 
+constexpr int num_free_list = 8;
+
+/* A free_list for pages with BYTES size.  */
+struct free_list
+{
+  size_t bytes;
+  page_entry *free_pages;
+};
+
 /* The rest of the global variables.  */
 static struct ggc_globals
 {
@@ -412,8 +426,8 @@ static struct ggc_globals
   int dev_zero_fd;
 #endif
 
-  /* A cache of free system pages.  */
-  page_entry *free_pages;
+  /* A cache of free system pages. Entry 0 is fallback.  */
+  struct free_list free_lists[num_free_list];
 
 #ifdef USING_MALLOC_PAGE_GROUPS
   page_group *page_groups;
@@ -488,6 +502,9 @@ static struct ggc_globals
 
     /* The overhead for each of the allocation orders.  */
     unsigned long long total_overhead_per_order[NUM_ORDERS];
+
+    /* Number of fallbacks.  */
+    unsigned long long fallback;
   } stats;
 } G;
 
@@ -754,6 +771,42 @@ clear_page_group_in_use (page_group *group, char *page)
 }
 #endif
 
+/* Find a free list for ENTRY_SIZE.  */
+
+static inline struct free_list *
+find_free_list (size_t entry_size)
+{
+  int i;
+  for (i = 1; i < num_free_list; i++)
+    {
+      if (G.free_lists[i].bytes == entry_size)
+       return &G.free_lists[i];
+      if (G.free_lists[i].bytes == 0)
+       {
+         G.free_lists[i].bytes = entry_size;
+         return &G.free_lists[i];
+       }
+    }
+  /* Fallback. Does this happen?  */
+  if (GATHER_STATISTICS)
+    G.stats.fallback++;
+  return &G.free_lists[0];
+}
+
+/* Fast lookup of free_list by order.  */
+
+static struct free_list *cache_free_list[NUM_ORDERS];
+
+/* Faster way to find a free list by ORDER for BYTES.  */
+
+static inline struct free_list *
+find_free_list_order (unsigned order, size_t bytes)
+{
+  if (cache_free_list[order] == NULL)
+    cache_free_list[order] = find_free_list (bytes);
+  return cache_free_list[order];
+}
+
 /* Allocate a new page for allocating objects of size 2^ORDER,
    and return an entry for it.  The entry is not added to the
    appropriate page_table list.  */
@@ -770,6 +823,7 @@ alloc_page (unsigned order)
 #ifdef USING_MALLOC_PAGE_GROUPS
   page_group *group;
 #endif
+  struct free_list *free_list;
 
   num_objects = OBJECTS_PER_PAGE (order);
   bitmap_size = BITMAP_SIZE (num_objects + 1);
@@ -782,8 +836,10 @@ alloc_page (unsigned order)
   entry = NULL;
   page = NULL;
 
+  free_list = find_free_list_order (order, entry_size);
+
   /* Check the list of free pages for one we can use.  */
-  for (pp = &G.free_pages, p = *pp; p; pp = &p->next, p = *pp)
+  for (pp = &free_list->free_pages, p = *pp; p; pp = &p->next, p = *pp)
     if (p->bytes == entry_size)
       break;
 
@@ -816,7 +872,7 @@ alloc_page (unsigned order)
       /* We want just one page.  Allocate a bunch of them and put the
         extras on the freelist.  (Can only do this optimization with
         mmap for backing store.)  */
-      struct page_entry *e, *f = G.free_pages;
+      struct page_entry *e, *f = free_list->free_pages;
       int i, entries = GGC_QUIRE_SIZE;
 
       page = alloc_anon (NULL, G.pagesize * GGC_QUIRE_SIZE, false);
@@ -833,12 +889,13 @@ alloc_page (unsigned order)
          e = XCNEWVAR (struct page_entry, page_entry_size);
          e->order = order;
          e->bytes = G.pagesize;
+         e->free_list = free_list;
          e->page = page + (i << G.lg_pagesize);
          e->next = f;
          f = e;
        }
 
-      G.free_pages = f;
+      free_list->free_pages = f;
     }
   else
     page = alloc_anon (NULL, entry_size, true);
@@ -904,12 +961,13 @@ alloc_page (unsigned order)
              e = XCNEWVAR (struct page_entry, page_entry_size);
              e->order = order;
              e->bytes = G.pagesize;
+             e->free_list = free_list;
              e->page = a;
              e->group = group;
              e->next = f;
              f = e;
            }
-         G.free_pages = f;
+         free_list->free_pages = f;
        }
     }
 #endif
@@ -918,6 +976,7 @@ alloc_page (unsigned order)
     entry = XCNEWVAR (struct page_entry, page_entry_size);
 
   entry->bytes = entry_size;
+  entry->free_list = free_list;
   entry->page = page;
   entry->context_depth = G.context_depth;
   entry->order = order;
@@ -1006,14 +1065,15 @@ free_page (page_entry *entry)
 
   adjust_depth ();
 
-  entry->next = G.free_pages;
-  G.free_pages = entry;
+  struct free_list *free_list = entry->free_list;
+  entry->next = free_list->free_pages;
+  free_list->free_pages = entry;
 }
 
-/* Release the free page cache to the system.  */
+/* Release the free page cache for FREE_LIST to the system.  */
 
 static void
-release_pages (void)
+do_release_pages (struct free_list *free_list)
 {
   size_t n1 = 0;
   size_t n2 = 0;
@@ -1031,7 +1091,7 @@ release_pages (void)
      This does not always work because the free_pages list is only
      approximately sorted. */
 
-  p = G.free_pages;
+  p = free_list->free_pages;
   prev = NULL;
   while (p)
     {
@@ -1060,7 +1120,7 @@ release_pages (void)
          if (prev)
            prev->next = p;
           else
-            G.free_pages = p;
+           free_list->free_pages = p;
           G.bytes_mapped -= mapped_len;
          n1 += len;
          continue;
@@ -1071,7 +1131,7 @@ release_pages (void)
   /* Now give back the fragmented pages to the OS, but keep the address
      space to reuse it next time. */
 
-  for (p = G.free_pages; p; )
+  for (p = free_list->free_pages; p; )
     {
       if (p->discarded)
         {
@@ -1108,7 +1168,7 @@ release_pages (void)
   size_t len;
 
   /* Gather up adjacent pages so they are unmapped together.  */
-  p = G.free_pages;
+  p = free_list->free_pages;
 
   while (p)
     {
@@ -1131,14 +1191,14 @@ release_pages (void)
       G.bytes_mapped -= len;
     }
 
-  G.free_pages = NULL;
+  free_list->free_pages = NULL;
 #endif
 #ifdef USING_MALLOC_PAGE_GROUPS
   page_entry **pp, *p;
   page_group **gp, *g;
 
   /* Remove all pages from free page groups from the list.  */
-  pp = &G.free_pages;
+  pp = &free_list->free_pages;
   while ((p = *pp) != NULL)
     if (p->group->in_use == 0)
       {
@@ -1172,6 +1232,15 @@ release_pages (void)
     }
 }
 
+/* Release the free page cache to the system.  */
+
+static void
+release_pages ()
+{
+  for (int i = 0; i < num_free_list; i++)
+    do_release_pages (&G.free_lists[i]);
+}
+
 /* This table provides a fast way to determine ceil(log_2(size)) for
    allocation requests.  The minimum allocation size is eight bytes.  */
 #define NUM_SIZE_LOOKUP 512
@@ -1801,9 +1870,10 @@ init_ggc (void)
     /* We have a good page, might as well hold onto it...  */
     e = XCNEW (struct page_entry);
     e->bytes = G.pagesize;
+    e->free_list = find_free_list (G.pagesize);
     e->page = p;
-    e->next = G.free_pages;
-    G.free_pages = e;
+    e->next = e->free_list->free_pages;
+    e->free_list->free_pages = e;
   }
 #endif
 
@@ -2404,6 +2474,9 @@ ggc_print_statistics (void)
       fprintf (stderr, "Total Allocated under 128B:              "
               PRsa (9) "\n",
               SIZE_AMOUNT (G.stats.total_allocated_under128));
+      fprintf (stderr, "Number of free list fallbacks:           "
+              PRsa (9) "\n",
+              SIZE_AMOUNT (G.stats.fallback));
 
       for (i = 0; i < NUM_ORDERS; i++)
        if (G.stats.total_allocated_per_order[i])
@@ -2677,6 +2750,7 @@ ggc_pch_read (FILE *f, void *addr)
                                            - sizeof (long)
                                            + BITMAP_SIZE (num_objs + 1)));
       entry->bytes = bytes;
+      entry->free_list = find_free_list (bytes);
       entry->page = offs;
       entry->context_depth = 0;
       offs += bytes;
-- 
2.49.0

Reply via email to