The following patch addresses appearant quadraticness in PTAs SCC merging algorithm which does (simplified)
for (node N in SCC) bitmap_ior_into (pred(leader), pred(N)); with the linked list implementation this leads to increasingly large amount of walking for pred(leader). The patch changes this to while (split /= 2) for (node N in lower-half-of-SCC) bitmap_ior_into_and_free (pred (N), pred (N in upper half of SCC)); so in each outer iteration reducing the number of bitmaps by a factor of two and thus hopefully (and in practice for the testcase) reducing the intermediate bitmap sizes we work on. This improves tree PTA : 8.38 ( 23%) 0.22 ( 33%) 8.62 ( 24%) 7679 kB ( 1%) to tree PTA : 5.30 ( 16%) 0.22 ( 32%) 5.54 ( 16%) 28081 kB ( 4%) for the reduced testcase. I somehow didn't manage a 1:1 conversion so I need to re-check details here still I'd like to hear comments about the new bitmap API which is a source-destructive variant for bitmap_ior_into, instead of copying elements not in A moving them from B and in the end releasing the rest. Note using this API with keeping the merging as-is doesn't help PTA time. Richard. 2019-07-29 Richard Biener <rguent...@suse.de> PR tree-optimization/91257 * bitmap.h (bitmap_ior_into_and_free): Declare. * bitmap.c (bitmap_list_unlink_element): Add defaulted param whether to add the unliked element to the freelist. (bitmap_list_insert_element_after): Add defaulted param for an already allocated element. (bitmap_ior_into_and_free): New function. * tree-ssa-structalias.c (condense_visit): Reduce the predecessor and edge bitmaps of the SCC members in a logarithmic fashion rather than all to one. Index: gcc/bitmap.c =================================================================== --- gcc/bitmap.c (revision 273795) +++ gcc/bitmap.c (working copy) @@ -252,7 +252,8 @@ bitmap_list_link_element (bitmap head, b and return it to the freelist. */ static inline void -bitmap_list_unlink_element (bitmap head, bitmap_element *element) +bitmap_list_unlink_element (bitmap head, bitmap_element *element, + bool to_freelist = true) { bitmap_element *next = element->next; bitmap_element *prev = element->prev; @@ -279,18 +280,21 @@ bitmap_list_unlink_element (bitmap head, head->indx = 0; } - bitmap_elem_to_freelist (head, element); + if (to_freelist) + bitmap_elem_to_freelist (head, element); } -/* Insert a new uninitialized element into bitmap HEAD after element - ELT. If ELT is NULL, insert the element at the start. Return the - new element. */ +/* Insert a new uninitialized element (or NODE if not NULL) into bitmap + HEAD after element ELT. If ELT is NULL, insert the element at the start. + Return the new element. */ static bitmap_element * bitmap_list_insert_element_after (bitmap head, - bitmap_element *elt, unsigned int indx) + bitmap_element *elt, unsigned int indx, + bitmap_element *node = NULL) { - bitmap_element *node = bitmap_element_allocate (head); + if (!node) + node = bitmap_element_allocate (head); node->indx = indx; gcc_checking_assert (!head->tree_form); @@ -2009,6 +2013,56 @@ bitmap_ior_into (bitmap a, const_bitmap return changed; } +/* A |= B. Return true if A changes. Free B (re-using its storage + for the result). */ + +bool +bitmap_ior_into_and_free (bitmap a, bitmap *b_) +{ + bitmap b = *b_; + bitmap_element *a_elt = a->first; + bitmap_element *b_elt = b->first; + bitmap_element *a_prev = NULL; + bitmap_element **a_prev_pnext = &a->first; + bool changed = false; + + gcc_checking_assert (!a->tree_form && !b->tree_form); + gcc_assert (a->obstack == b->obstack); + if (a == b) + return false; + + while (b_elt) + { + /* If A lags behind B, just advance it. */ + if (!a_elt || a_elt->indx == b_elt->indx) + { + changed = bitmap_elt_ior (a, a_elt, a_prev, a_elt, b_elt, changed); + b_elt = b_elt->next; + } + else if (a_elt->indx > b_elt->indx) + { + bitmap_element *b_elt_next = b_elt->next; + bitmap_list_unlink_element (b, b_elt, false); + bitmap_list_insert_element_after (a, a_prev, b_elt->indx, b_elt); + b_elt = b_elt_next; + } + + a_prev = *a_prev_pnext; + a_prev_pnext = &a_prev->next; + a_elt = *a_prev_pnext; + } + + gcc_checking_assert (!a->current == !a->first); + if (a->current) + a->indx = a->current->indx; + + if (b->obstack) + BITMAP_FREE (*b_); + else + bitmap_clear (b); + return changed; +} + /* DST = A ^ B */ void Index: gcc/bitmap.h =================================================================== --- gcc/bitmap.h (revision 273795) +++ gcc/bitmap.h (working copy) @@ -399,6 +399,7 @@ extern void bitmap_clear_range (bitmap, extern void bitmap_set_range (bitmap, unsigned int, unsigned int); extern bool bitmap_ior (bitmap, const_bitmap, const_bitmap); extern bool bitmap_ior_into (bitmap, const_bitmap); +extern bool bitmap_ior_into_and_free (bitmap, bitmap *); extern void bitmap_xor (bitmap, const_bitmap, const_bitmap); extern void bitmap_xor_into (bitmap, const_bitmap); Index: gcc/tree-ssa-structalias.c =================================================================== --- gcc/tree-ssa-structalias.c (revision 273795) +++ gcc/tree-ssa-structalias.c (working copy) @@ -2069,36 +2069,72 @@ condense_visit (constraint_graph_t graph /* See if any components have been identified. */ if (si->dfs[n] == my_dfs) { - while (si->scc_stack.length () != 0 - && si->dfs[si->scc_stack.last ()] >= my_dfs) + if (si->scc_stack.length () != 0 + && si->dfs[si->scc_stack.last ()] >= my_dfs) { - unsigned int w = si->scc_stack.pop (); - si->node_mapping[w] = n; - - if (!bitmap_bit_p (graph->direct_nodes, w)) - bitmap_clear_bit (graph->direct_nodes, n); - - /* Unify our nodes. */ - if (graph->preds[w]) - { - if (!graph->preds[n]) - graph->preds[n] = BITMAP_ALLOC (&predbitmap_obstack); - bitmap_ior_into (graph->preds[n], graph->preds[w]); - } - if (graph->implicit_preds[w]) + /* Find the first node of the SCC and do non-bitmap work. */ + bool direct_p = true; + unsigned first = si->scc_stack.length (); + do { - if (!graph->implicit_preds[n]) - graph->implicit_preds[n] = BITMAP_ALLOC (&predbitmap_obstack); - bitmap_ior_into (graph->implicit_preds[n], - graph->implicit_preds[w]); + --first; + unsigned int w = si->scc_stack[first]; + si->node_mapping[w] = n; + if (!bitmap_bit_p (graph->direct_nodes, w)) + direct_p = false; } - if (graph->points_to[w]) + while (first > 0 + && si->dfs[--first] >= my_dfs); + if (!direct_p) + bitmap_clear_bit (graph->direct_nodes, n); + + /* Want to reduce to node n, push that first. */ + si->scc_stack.safe_push (si->scc_stack[first]); + si->scc_stack[first] = n; + + unsigned scc_size = si->scc_stack.length () - first; + unsigned split = scc_size / 2; + unsigned carry = scc_size - split * 2; + while (split > 0) { - if (!graph->points_to[n]) - graph->points_to[n] = BITMAP_ALLOC (&predbitmap_obstack); - bitmap_ior_into (graph->points_to[n], - graph->points_to[w]); + for (unsigned i = 0; i < split; ++i) + { + unsigned a = si->scc_stack[first + i]; + unsigned b = si->scc_stack[first + split + carry + i]; + + /* Unify our nodes. */ + if (graph->preds[b]) + { + if (!graph->preds[a]) + std::swap (graph->preds[a], graph->preds[b]); + else + bitmap_ior_into_and_free (graph->preds[a], + &graph->preds[b]); + } + if (graph->implicit_preds[b]) + { + if (!graph->implicit_preds[a]) + std::swap (graph->implicit_preds[a], + graph->implicit_preds[b]); + else + bitmap_ior_into_and_free (graph->implicit_preds[a], + &graph->implicit_preds[b]); + } + if (graph->points_to[b]) + { + if (!graph->points_to[a]) + std::swap (graph->points_to[a], graph->points_to[b]); + else + bitmap_ior_into_and_free (graph->points_to[a], + &graph->points_to[b]); + } + } + unsigned remain = split + carry; + split = remain / 2; + carry = remain - split * 2; } + /* Actually pop the SCC. */ + si->scc_stack.truncate (first); } bitmap_set_bit (si->deleted, n); }