On Wed, Mar 23, 2016 at 8:49 AM, Jeff Law <l...@redhat.com> wrote: > > This patch contains two parts. First is a bit of raw infrastructure in > bitmap.c. That change factors out the code to count the bits set in a given > BITMAP_WORD and uses that new function from bitmap_count_bits. > > It also introduces bitmap_count_unique_bits which counts the unique bits set > across two bitmaps. > > > The second patch introduces the new tiebreaker in the coalescer and utilizes > the new bitmap_count_unique_bits in the process. The major change since V1 > is lazily counting those bits only when the major sort keys are the same for > two coalesce_pairs. And we include the data for the new heuristic in the > debugging dumps. > > As I feared building a good testcase for this is a major PITA. Testing for > a specific set of coalescings is not easy at all and IMHO is going to be way > too fragile. > > I did verify by hand that the test in 64058 and its duplicate were fixed by > various iterations of this patch using trunk compilers reported in those > BZs. Spot checking other tests showed a small, but consistent improvement > in the generated code (fewer copies). > > Bootstrapped and regression tested on x86-64-linux-gnu. OK for the trunk > now?
+ssa_conflicts *conflicts_; +var_map map_; please make those 'static'. Ok with this change. Thanks, Richard. > Jeff > > > commit 716d40d9b853795f007476936f2ac06851d201e0 > Author: Jeff Law <l...@tor.usersys.redhat.com> > Date: Wed Mar 16 16:56:03 2016 -0400 > > PR tree-optimization/64058 > * bitmap.c (bitmap_count_unique_bits): New function. > (bitmap_count_bits_in_word): New function, extracted from > bitmap_count_bits. > (bitmap_count_bits): Use bitmap_count_bits_in_word. > * bitmap.h (bitmap_count_unique_bits): Declare it. > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 6f52c2d..05389b6 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,5 +1,12 @@ > 2016-03-18 Jeff Law <l...@redhat.com> > > + PR tree-optimization/64058 > + * bitmap.c (bitmap_count_unique_bits): New function. > + (bitmap_count_bits_in_word): New function, extracted from > + bitmap_count_bits. > + (bitmap_count_bits): Use bitmap_count_bits_in_word. > + * bitmap.h (bitmap_count_unique_bits): Declare it. > + > PR rtl-optimization/70263 > * ira.c (memref_used_between_p): Assert we found END in the insn > chain. > (update_equiv_regs): When trying to move a store to after the insn > diff --git a/gcc/bitmap.c b/gcc/bitmap.c > index ac20ae5..0c05512 100644 > --- a/gcc/bitmap.c > +++ b/gcc/bitmap.c > @@ -662,6 +662,26 @@ bitmap_popcount (BITMAP_WORD a) > return ret; > } > #endif > + > +/* Count and return the number of bits set in the bitmap word BITS. */ > +static unsigned long > +bitmap_count_bits_in_word (const BITMAP_WORD *bits) > +{ > + unsigned long count = 0; > + > + for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++) > + { > +#if GCC_VERSION >= 3400 > + /* Note that popcountl matches BITMAP_WORD in type, so the actual > size > + of BITMAP_WORD is not material. */ > + count += __builtin_popcountl (bits[ix]); > +#else > + count += bitmap_popcount (bits[ix]); > +#endif > + } > + return count; > +} > + > /* Count the number of bits set in the bitmap, and return it. */ > > unsigned long > @@ -669,19 +689,44 @@ bitmap_count_bits (const_bitmap a) > { > unsigned long count = 0; > const bitmap_element *elt; > - unsigned ix; > > for (elt = a->first; elt; elt = elt->next) > + count += bitmap_count_bits_in_word (elt->bits); > + > + return count; > +} > + > +/* Count the number of unique bits set in A and B and return it. */ > + > +unsigned long > +bitmap_count_unique_bits (const_bitmap a, const_bitmap b) > +{ > + unsigned long count = 0; > + const bitmap_element *elt_a, *elt_b; > + > + for (elt_a = a->first, elt_b = b->first; elt_a && elt_b; ) > { > - for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++) > + /* If we're at different indices, then count all the bits > + in the lower element. If we're at the same index, then > + count the bits in the IOR of the two elements. */ > + if (elt_a->indx < elt_b->indx) > { > -#if GCC_VERSION >= 3400 > - /* Note that popcountl matches BITMAP_WORD in type, so the actual > size > - of BITMAP_WORD is not material. */ > - count += __builtin_popcountl (elt->bits[ix]); > -#else > - count += bitmap_popcount (elt->bits[ix]); > -#endif > + count += bitmap_count_bits_in_word (elt_a->bits); > + elt_a = elt_a->next; > + } > + else if (elt_b->indx < elt_a->indx) > + { > + count += bitmap_count_bits_in_word (elt_b->bits); > + elt_b = elt_b->next; > + } > + else > + { > + BITMAP_WORD bits[BITMAP_ELEMENT_WORDS]; > + for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++) > + bits[ix] = elt_a->bits[ix] | elt_b->bits[ix]; > + count += bitmap_count_bits_in_word (bits); > + elt_a = elt_a->next; > + elt_b = elt_b->next; > } > } > return count; > diff --git a/gcc/bitmap.h b/gcc/bitmap.h > index 805e37e..1115711 100644 > --- a/gcc/bitmap.h > +++ b/gcc/bitmap.h > @@ -280,6 +280,9 @@ extern bool bitmap_single_bit_set_p (const_bitmap); > /* Count the number of bits set in the bitmap. */ > extern unsigned long bitmap_count_bits (const_bitmap); > > +/* Count the number of unique bits set across the two bitmaps. */ > +extern unsigned long bitmap_count_unique_bits (const_bitmap, const_bitmap); > + > /* Boolean operations on bitmaps. The _into variants are two operand > versions that modify the first source operand. The other variants > are three operand versions that to not destroy the source bitmaps. > > commit e49388235bf159ab76b95f5af90aa25c5e5813cd > Author: Jeff Law <l...@tor.usersys.redhat.com> > Date: Wed Mar 16 17:03:29 2016 -0400 > > And finally the new heuristic > > PR tree-optimization/64058 > * tree-ssa-coalesce.c (struct coalesce_pair): Add new field > CONFLICT_COUNT. > (struct ssa_conflicts): Move up earlier in the file. > (conflicts_, var_map_): New static variables. > (initialize_conflict_count): New function to initialize the > CONFLICT_COUNT field for each conflict pair. > (compare_pairs): Lazily initialize the conflict count and use it > as the first tie-breaker. > (sort_coalesce_list): Add new arguments conflicts, map. Initialize > and wipe conflicts_ and map_ around the call to qsort. Remove > special case for 2 coalesce pairs. > (find_coalesce_pair): Initialize CONFLICT_COUNT field. > (dump_coalesce_list): Print the CONFLICT_COUNT field too. > * bitmap.c (bitmap_count_unique_bits): New function. > (bitmap_count_bits_in_word): New function, extracted from > bitmap_count_bits. > (bitmap_count_bits): Use bitmap_count_bits_in_word. > * bitmap.h (bitmap_count_unique_bits): Declare it. > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 05389b6..96f1f11 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,6 +1,17 @@ > 2016-03-18 Jeff Law <l...@redhat.com> > > PR tree-optimization/64058 > + * tree-ssa-coalesce.c (struct coalesce_pair): Add new field > + CONFLICT_COUNT. > + (struct ssa_conflicts): Move up earlier in the file. > + (conflicts_, var_map_): New static variables. > + (initialize_conflict_count): New function to initialize the > + CONFLICT_COUNT field for each conflict pair. > + (compare_pairs): Lazily initialize the conflict count and use it > + as the first tie-breaker. > + (sort_coalesce_list): Add new arguments conflicts, map. Initialize > + and wipe conflicts_ and map_ around the call to qsort. Remove > + special case for 2 coalesce pairs. > * bitmap.c (bitmap_count_unique_bits): New function. > (bitmap_count_bits_in_word): New function, extracted from > bitmap_count_bits. > diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c > index e49876e..7120d29 100644 > --- a/gcc/tree-ssa-coalesce.c > +++ b/gcc/tree-ssa-coalesce.c > @@ -51,12 +51,40 @@ struct coalesce_pair > int second_element; > int cost; > > + /* A count of the number of unique partitions this pair would conflict > + with if coalescing was successful. This is the secondary sort key, > + given two pairs with equal costs, we will prefer the pair with a > smaller > + conflict set. > + > + This is lazily initialized when we discover two coalescing pairs have > + the same primary cost. > + > + Note this is not updated and propagated as pairs are coalesced. */ > + int conflict_count; > + > /* The order in which coalescing pairs are discovered is recorded in this > field, which is used as the final tie breaker when sorting coalesce > pairs. */ > int index; > }; > > +/* This represents a conflict graph. Implemented as an array of bitmaps. > + A full matrix is used for conflicts rather than just upper triangular > form. > + this make sit much simpler and faster to perform conflict merges. */ > + > +struct ssa_conflicts > +{ > + bitmap_obstack obstack; /* A place to allocate our bitmaps. */ > + vec<bitmap> conflicts; > +}; > + > +/* The narrow API of the qsort comparison function doesn't allow easy > + access to additional arguments. So we have two globals (ick) to hold > + the data we need. They're initialized before the call to qsort and > + wiped immediately after. */ > +ssa_conflicts *conflicts_; > +var_map map_; > + > /* Coalesce pair hashtable helpers. */ > > struct coalesce_pair_hasher : nofree_ptr_hash <coalesce_pair> > @@ -303,6 +331,7 @@ find_coalesce_pair (coalesce_list *cl, int p1, int p2, > bool create) > pair->second_element = p.second_element; > pair->cost = 0; > pair->index = num_coalesce_pairs (cl); > + pair->conflict_count = 0; > *slot = pair; > } > > @@ -345,21 +374,66 @@ add_coalesce (coalesce_list *cl, int p1, int p2, int > value) > } > } > > +/* Compute and record how many unique conflicts would exist for the > + representative partition for each coalesce pair in CL. > + > + CONFLICTS is the conflict graph and MAP is the current partition view. > */ > + > +static void > +initialize_conflict_count (coalesce_pair *p, > + ssa_conflicts *conflicts, > + var_map map) > +{ > + int p1 = var_to_partition (map, ssa_name (p->first_element)); > + int p2 = var_to_partition (map, ssa_name (p->second_element)); > + > + /* 4 cases. If both P1 and P2 have conflicts, then build their > + union and count the members. Else handle the degenerate cases > + in the obvious ways. */ > + if (conflicts->conflicts[p1] && conflicts->conflicts[p2]) > + p->conflict_count = bitmap_count_unique_bits (conflicts->conflicts[p1], > + conflicts->conflicts[p2]); > + else if (conflicts->conflicts[p1]) > + p->conflict_count = bitmap_count_bits (conflicts->conflicts[p1]); > + else if (conflicts->conflicts[p2]) > + p->conflict_count = bitmap_count_bits (conflicts->conflicts[p2]); > + else > + p->conflict_count = 0; > +} > + > > /* Comparison function to allow qsort to sort P1 and P2 in Ascending order. > */ > > static int > compare_pairs (const void *p1, const void *p2) > { > - const coalesce_pair *const *const pp1 = (const coalesce_pair *const *) > p1; > - const coalesce_pair *const *const pp2 = (const coalesce_pair *const *) > p2; > + coalesce_pair *const *const pp1 = (coalesce_pair *const *) p1; > + coalesce_pair *const *const pp2 = (coalesce_pair *const *) p2; > int result; > > result = (* pp1)->cost - (* pp2)->cost; > - /* And if everything else is equal, then sort based on which > - coalesce pair was found first. */ > + /* We use the size of the resulting conflict set as the secondary sort > key. > + Given two equal costing coalesce pairs, we want to prefer the pair > that > + has the smaller conflict set. */ > if (result == 0) > - result = (*pp2)->index - (*pp1)->index; > + { > + if (flag_expensive_optimizations) > + { > + /* Lazily initialize the conflict counts as it's fairly expensive > + to compute. */ > + if ((*pp2)->conflict_count == 0) > + initialize_conflict_count (*pp2, conflicts_, map_); > + if ((*pp1)->conflict_count == 0) > + initialize_conflict_count (*pp1, conflicts_, map_); > + > + result = (*pp2)->conflict_count - (*pp1)->conflict_count; > + } > + > + /* And if everything else is equal, then sort based on which > + coalesce pair was found first. */ > + if (result == 0) > + result = (*pp2)->index - (*pp1)->index; > + } > > return result; > } > @@ -374,7 +448,7 @@ compare_pairs (const void *p1, const void *p2) > in order from most important coalesce to least important. */ > > static void > -sort_coalesce_list (coalesce_list *cl) > +sort_coalesce_list (coalesce_list *cl, ssa_conflicts *conflicts, var_map > map) > { > unsigned x, num; > coalesce_pair *p; > @@ -400,19 +474,14 @@ sort_coalesce_list (coalesce_list *cl) > if (num == 1) > return; > > - /* If there are only 2, just pick swap them if the order isn't correct. > */ > - if (num == 2) > - { > - if (cl->sorted[0]->cost > cl->sorted[1]->cost) > - std::swap (cl->sorted[0], cl->sorted[1]); > - return; > - } > - > - /* Only call qsort if there are more than 2 items. > - ??? Maybe std::sort will do better, provided that compare_pairs > - can be inlined. */ > - if (num > 2) > - qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs); > + /* We don't want to depend on qsort_r, so we have to stuff away > + additional data into globals so it can be referenced in > + compare_pairs. */ > + conflicts_ = conflicts; > + map_ = map; > + qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs); > + conflicts_ = NULL; > + map_ = NULL; > } > > > @@ -437,7 +506,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl) > print_generic_expr (f, var1, TDF_SLIM); > fprintf (f, " <-> "); > print_generic_expr (f, var2, TDF_SLIM); > - fprintf (f, " (%1d), ", node->cost); > + fprintf (f, " (%1d, %1d), ", node->cost, node->conflict_count); > fprintf (f, "\n"); > } > } > @@ -447,7 +516,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl) > for (x = cl->num_sorted - 1 ; x >=0; x--) > { > node = cl->sorted[x]; > - fprintf (f, "(%d) ", node->cost); > + fprintf (f, "(%d, %d) ", node->cost, node->conflict_count); > var = ssa_name (node->first_element); > print_generic_expr (f, var, TDF_SLIM); > fprintf (f, " <-> "); > @@ -459,16 +528,6 @@ dump_coalesce_list (FILE *f, coalesce_list *cl) > } > > > -/* This represents a conflict graph. Implemented as an array of bitmaps. > - A full matrix is used for conflicts rather than just upper triangular > form. > - this make sit much simpler and faster to perform conflict merges. */ > - > -struct ssa_conflicts > -{ > - bitmap_obstack obstack; /* A place to allocate our bitmaps. */ > - vec<bitmap> conflicts; > -}; > - > /* Return an empty new conflict graph for SIZE elements. */ > > static inline ssa_conflicts * > @@ -1800,7 +1859,7 @@ coalesce_ssa_name (void) > if (dump_file && (dump_flags & TDF_DETAILS)) > ssa_conflicts_dump (dump_file, graph); > > - sort_coalesce_list (cl); > + sort_coalesce_list (cl, graph, map); > > if (dump_file && (dump_flags & TDF_DETAILS)) > { >