On Mon, 29 Jul 2019, Richard Biener wrote:

> 
> The following patch addresses appearant quadraticness in PTAs
> SCC merging algorithm which does (simplified)
> 
>  for (node N in SCC)
>    bitmap_ior_into (pred(leader), pred(N));
> 
> with the linked list implementation this leads to increasingly large
> amount of walking for pred(leader).
> 
> The patch changes this to
> 
>  while (split /= 2)
>    for (node N in lower-half-of-SCC)
>      bitmap_ior_into_and_free (pred (N), pred (N in upper half of SCC));
> 
> so in each outer iteration reducing the number of bitmaps by a factor
> of two and thus hopefully (and in practice for the testcase) reducing
> the intermediate bitmap sizes we work on.
> 
> This improves
> 
>  tree PTA                           :   8.38 ( 23%)   0.22 ( 33%)   8.62 ( 
> 24%)    7679 kB (  1%)
> 
> to
> 
>  tree PTA                           :   5.30 ( 16%)   0.22 ( 32%)   5.54 ( 
> 16%)   28081 kB (  4%)
> 
> for the reduced testcase.
> 
> I somehow didn't manage a 1:1 conversion so I need to re-check details
> here still I'd like to hear comments about the new bitmap API
> which is a source-destructive variant for bitmap_ior_into, instead of
> copying elements not in A moving them from B and in the end releasing
> the rest.  Note using this API with keeping the merging as-is doesn't
> help PTA time.

This is what I applied.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

Richard.

2019-07-30  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/91257
        * bitmap.h (bitmap_ior_into_and_free): Declare.
        * bitmap.c (bitmap_list_unlink_element): Add defaulted param
        whether to add the unliked element to the freelist.
        (bitmap_list_insert_element_after): Add defaulted param for
        an already allocated element.
        (bitmap_ior_into_and_free): New function.
        * tree-ssa-structalias.c (condense_visit): Reduce the
        ponts-to and edge bitmaps of the SCC members in a
        logarithmic fashion rather than all to one.

Index: gcc/bitmap.c
===================================================================
--- gcc/bitmap.c        (revision 273795)
+++ gcc/bitmap.c        (working copy)
@@ -252,7 +252,8 @@ bitmap_list_link_element (bitmap head, b
    and return it to the freelist.  */
 
 static inline void
-bitmap_list_unlink_element (bitmap head, bitmap_element *element)
+bitmap_list_unlink_element (bitmap head, bitmap_element *element,
+                           bool to_freelist = true)
 {
   bitmap_element *next = element->next;
   bitmap_element *prev = element->prev;
@@ -279,18 +280,21 @@ bitmap_list_unlink_element (bitmap head,
        head->indx = 0;
     }
 
-  bitmap_elem_to_freelist (head, element);
+  if (to_freelist)
+    bitmap_elem_to_freelist (head, element);
 }
 
-/* Insert a new uninitialized element into bitmap HEAD after element
-   ELT.  If ELT is NULL, insert the element at the start.  Return the
-   new element.  */
+/* Insert a new uninitialized element (or NODE if not NULL) into bitmap
+   HEAD after element ELT.  If ELT is NULL, insert the element at the start.
+   Return the new element.  */
 
 static bitmap_element *
 bitmap_list_insert_element_after (bitmap head,
-                                 bitmap_element *elt, unsigned int indx)
+                                 bitmap_element *elt, unsigned int indx,
+                                 bitmap_element *node = NULL)
 {
-  bitmap_element *node = bitmap_element_allocate (head);
+  if (!node)
+    node = bitmap_element_allocate (head);
   node->indx = indx;
 
   gcc_checking_assert (!head->tree_form);
@@ -2009,6 +2013,56 @@ bitmap_ior_into (bitmap a, const_bitmap
   return changed;
 }
 
+/* A |= B.  Return true if A changes.  Free B (re-using its storage
+   for the result).  */
+
+bool
+bitmap_ior_into_and_free (bitmap a, bitmap *b_)
+{
+  bitmap b = *b_;
+  bitmap_element *a_elt = a->first;
+  bitmap_element *b_elt = b->first;
+  bitmap_element *a_prev = NULL;
+  bitmap_element **a_prev_pnext = &a->first;
+  bool changed = false;
+
+  gcc_checking_assert (!a->tree_form && !b->tree_form);
+  gcc_assert (a->obstack == b->obstack);
+  if (a == b)
+    return false;
+
+  while (b_elt)
+    {
+      /* If A lags behind B, just advance it.  */
+      if (!a_elt || a_elt->indx == b_elt->indx)
+       {
+         changed = bitmap_elt_ior (a, a_elt, a_prev, a_elt, b_elt, changed);
+         b_elt = b_elt->next;
+       }
+      else if (a_elt->indx > b_elt->indx)
+       {
+         bitmap_element *b_elt_next = b_elt->next;
+         bitmap_list_unlink_element (b, b_elt, false);
+         bitmap_list_insert_element_after (a, a_prev, b_elt->indx, b_elt);
+         b_elt = b_elt_next;
+       }
+
+      a_prev = *a_prev_pnext;
+      a_prev_pnext = &a_prev->next;
+      a_elt = *a_prev_pnext;
+    }
+
+  gcc_checking_assert (!a->current == !a->first);
+  if (a->current)
+    a->indx = a->current->indx;
+
+  if (b->obstack)
+    BITMAP_FREE (*b_);
+  else
+    bitmap_clear (b);
+  return changed;
+}
+
 /* DST = A ^ B  */
 
 void
Index: gcc/bitmap.h
===================================================================
--- gcc/bitmap.h        (revision 273795)
+++ gcc/bitmap.h        (working copy)
@@ -399,6 +399,7 @@ extern void bitmap_clear_range (bitmap,
 extern void bitmap_set_range (bitmap, unsigned int, unsigned int);
 extern bool bitmap_ior (bitmap, const_bitmap, const_bitmap);
 extern bool bitmap_ior_into (bitmap, const_bitmap);
+extern bool bitmap_ior_into_and_free (bitmap, bitmap *);
 extern void bitmap_xor (bitmap, const_bitmap, const_bitmap);
 extern void bitmap_xor_into (bitmap, const_bitmap);
 
Index: gcc/tree-ssa-structalias.c
===================================================================
--- gcc/tree-ssa-structalias.c  (revision 273795)
+++ gcc/tree-ssa-structalias.c  (working copy)
@@ -2069,36 +2069,73 @@ condense_visit (constraint_graph_t graph
   /* See if any components have been identified.  */
   if (si->dfs[n] == my_dfs)
     {
-      while (si->scc_stack.length () != 0
-            && si->dfs[si->scc_stack.last ()] >= my_dfs)
+      if (si->scc_stack.length () != 0
+         && si->dfs[si->scc_stack.last ()] >= my_dfs)
        {
-         unsigned int w = si->scc_stack.pop ();
-         si->node_mapping[w] = n;
-
-         if (!bitmap_bit_p (graph->direct_nodes, w))
-           bitmap_clear_bit (graph->direct_nodes, n);
-
-         /* Unify our nodes.  */
-         if (graph->preds[w])
-           {
-             if (!graph->preds[n])
-               graph->preds[n] = BITMAP_ALLOC (&predbitmap_obstack);
-             bitmap_ior_into (graph->preds[n], graph->preds[w]);
-           }
-         if (graph->implicit_preds[w])
+         /* Find the first node of the SCC and do non-bitmap work.  */
+         bool direct_p = true;
+         unsigned first = si->scc_stack.length ();
+         do
            {
-             if (!graph->implicit_preds[n])
-               graph->implicit_preds[n] = BITMAP_ALLOC (&predbitmap_obstack);
-             bitmap_ior_into (graph->implicit_preds[n],
-                              graph->implicit_preds[w]);
+             --first;
+             unsigned int w = si->scc_stack[first];
+             si->node_mapping[w] = n;
+             if (!bitmap_bit_p (graph->direct_nodes, w))
+               direct_p = false;
            }
-         if (graph->points_to[w])
+         while (first > 0
+                && si->dfs[si->scc_stack[first - 1]] >= my_dfs);
+         if (!direct_p)
+           bitmap_clear_bit (graph->direct_nodes, n);
+
+         /* Want to reduce to node n, push that first.  */
+         si->scc_stack.reserve (1);
+         si->scc_stack.quick_push (si->scc_stack[first]);
+         si->scc_stack[first] = n;
+
+         unsigned scc_size = si->scc_stack.length () - first;
+         unsigned split = scc_size / 2;
+         unsigned carry = scc_size - split * 2;
+         while (split > 0)
            {
-             if (!graph->points_to[n])
-               graph->points_to[n] = BITMAP_ALLOC (&predbitmap_obstack);
-             bitmap_ior_into (graph->points_to[n],
-                              graph->points_to[w]);
+             for (unsigned i = 0; i < split; ++i)
+               {
+                 unsigned a = si->scc_stack[first + i];
+                 unsigned b = si->scc_stack[first + split + carry + i];
+
+                 /* Unify our nodes.  */
+                 if (graph->preds[b])
+                   {
+                     if (!graph->preds[a])
+                       std::swap (graph->preds[a], graph->preds[b]);
+                     else
+                       bitmap_ior_into_and_free (graph->preds[a],
+                                                 &graph->preds[b]);
+                   }
+                 if (graph->implicit_preds[b])
+                   {
+                     if (!graph->implicit_preds[a])
+                       std::swap (graph->implicit_preds[a],
+                                  graph->implicit_preds[b]);
+                     else
+                       bitmap_ior_into_and_free (graph->implicit_preds[a],
+                                                 &graph->implicit_preds[b]);
+                   }
+                 if (graph->points_to[b])
+                   {
+                     if (!graph->points_to[a])
+                       std::swap (graph->points_to[a], graph->points_to[b]);
+                     else
+                       bitmap_ior_into_and_free (graph->points_to[a],
+                                                 &graph->points_to[b]);
+                   }
+               }
+             unsigned remain = split + carry;
+             split = remain / 2;
+             carry = remain - split * 2;
            }
+         /* Actually pop the SCC.  */
+         si->scc_stack.truncate (first);
        }
       bitmap_set_bit (si->deleted, n);
     }

Reply via email to