On Wed, Mar 23, 2016 at 8:49 AM, Jeff Law <l...@redhat.com> wrote:
>
> This patch contains two parts.  First is a bit of raw infrastructure in
> bitmap.c.  That change factors out the code to count the bits set in a given
> BITMAP_WORD and uses that new function from bitmap_count_bits.
>
> It also introduces bitmap_count_unique_bits which counts the unique bits set
> across two bitmaps.
>
>
> The second patch introduces the new tiebreaker in the coalescer and utilizes
> the new bitmap_count_unique_bits in the process.  The major change since V1
> is lazily counting those bits only when the major sort keys are the same for
> two coalesce_pairs.  And we include the data for the new heuristic in the
> debugging dumps.
>
> As I feared building a good testcase for this is a major PITA.  Testing for
> a specific set of coalescings is not easy at all and IMHO is going to be way
> too fragile.
>
> I did verify by hand that the test in 64058 and its duplicate were fixed by
> various iterations of this patch using trunk compilers reported in those
> BZs.  Spot checking other tests showed a small, but consistent improvement
> in the generated code (fewer copies).
>
> Bootstrapped and regression tested on x86-64-linux-gnu.  OK for the trunk
> now?

+ssa_conflicts *conflicts_;
+var_map map_;

please make those 'static'.  Ok with this change.

Thanks,
Richard.

> Jeff
>
>
> commit 716d40d9b853795f007476936f2ac06851d201e0
> Author: Jeff Law <l...@tor.usersys.redhat.com>
> Date:   Wed Mar 16 16:56:03 2016 -0400
>
>         PR tree-optimization/64058
>         * bitmap.c (bitmap_count_unique_bits): New function.
>         (bitmap_count_bits_in_word): New function, extracted from
>         bitmap_count_bits.
>         (bitmap_count_bits): Use bitmap_count_bits_in_word.
>         * bitmap.h (bitmap_count_unique_bits): Declare it.
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 6f52c2d..05389b6 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,5 +1,12 @@
>  2016-03-18  Jeff Law  <l...@redhat.com>
>
> +       PR tree-optimization/64058
> +       * bitmap.c (bitmap_count_unique_bits): New function.
> +       (bitmap_count_bits_in_word): New function, extracted from
> +       bitmap_count_bits.
> +       (bitmap_count_bits): Use bitmap_count_bits_in_word.
> +       * bitmap.h (bitmap_count_unique_bits): Declare it.
> +
>         PR rtl-optimization/70263
>         * ira.c (memref_used_between_p): Assert we found END in the insn
> chain.
>         (update_equiv_regs): When trying to move a store to after the insn
> diff --git a/gcc/bitmap.c b/gcc/bitmap.c
> index ac20ae5..0c05512 100644
> --- a/gcc/bitmap.c
> +++ b/gcc/bitmap.c
> @@ -662,6 +662,26 @@ bitmap_popcount (BITMAP_WORD a)
>    return ret;
>  }
>  #endif
> +
> +/* Count and return the number of bits set in the bitmap word BITS.  */
> +static unsigned long
> +bitmap_count_bits_in_word (const BITMAP_WORD *bits)
> +{
> +  unsigned long count = 0;
> +
> +  for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
> +    {
> +#if GCC_VERSION >= 3400
> +      /* Note that popcountl matches BITMAP_WORD in type, so the actual
> size
> +        of BITMAP_WORD is not material.  */
> +      count += __builtin_popcountl (bits[ix]);
> +#else
> +      count += bitmap_popcount (bits[ix]);
> +#endif
> +    }
> +  return count;
> +}
> +
>  /* Count the number of bits set in the bitmap, and return it.  */
>
>  unsigned long
> @@ -669,19 +689,44 @@ bitmap_count_bits (const_bitmap a)
>  {
>    unsigned long count = 0;
>    const bitmap_element *elt;
> -  unsigned ix;
>
>    for (elt = a->first; elt; elt = elt->next)
> +    count += bitmap_count_bits_in_word (elt->bits);
> +
> +  return count;
> +}
> +
> +/* Count the number of unique bits set in A and B and return it.  */
> +
> +unsigned long
> +bitmap_count_unique_bits (const_bitmap a, const_bitmap b)
> +{
> +  unsigned long count = 0;
> +  const bitmap_element *elt_a, *elt_b;
> +
> +  for (elt_a = a->first, elt_b = b->first; elt_a && elt_b; )
>      {
> -      for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
> +      /* If we're at different indices, then count all the bits
> +        in the lower element.  If we're at the same index, then
> +        count the bits in the IOR of the two elements.  */
> +      if (elt_a->indx < elt_b->indx)
>         {
> -#if GCC_VERSION >= 3400
> -         /* Note that popcountl matches BITMAP_WORD in type, so the actual
> size
> -        of BITMAP_WORD is not material.  */
> -         count += __builtin_popcountl (elt->bits[ix]);
> -#else
> -         count += bitmap_popcount (elt->bits[ix]);
> -#endif
> +         count += bitmap_count_bits_in_word (elt_a->bits);
> +         elt_a = elt_a->next;
> +       }
> +      else if (elt_b->indx < elt_a->indx)
> +       {
> +         count += bitmap_count_bits_in_word (elt_b->bits);
> +         elt_b = elt_b->next;
> +       }
> +      else
> +       {
> +         BITMAP_WORD bits[BITMAP_ELEMENT_WORDS];
> +         for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
> +           bits[ix] = elt_a->bits[ix] | elt_b->bits[ix];
> +         count += bitmap_count_bits_in_word (bits);
> +         elt_a = elt_a->next;
> +         elt_b = elt_b->next;
>         }
>      }
>    return count;
> diff --git a/gcc/bitmap.h b/gcc/bitmap.h
> index 805e37e..1115711 100644
> --- a/gcc/bitmap.h
> +++ b/gcc/bitmap.h
> @@ -280,6 +280,9 @@ extern bool bitmap_single_bit_set_p (const_bitmap);
>  /* Count the number of bits set in the bitmap.  */
>  extern unsigned long bitmap_count_bits (const_bitmap);
>
> +/* Count the number of unique bits set across the two bitmaps.  */
> +extern unsigned long bitmap_count_unique_bits (const_bitmap, const_bitmap);
> +
>  /* Boolean operations on bitmaps.  The _into variants are two operand
>     versions that modify the first source operand.  The other variants
>     are three operand versions that to not destroy the source bitmaps.
>
> commit e49388235bf159ab76b95f5af90aa25c5e5813cd
> Author: Jeff Law <l...@tor.usersys.redhat.com>
> Date:   Wed Mar 16 17:03:29 2016 -0400
>
>     And finally the new heuristic
>
>         PR tree-optimization/64058
>         * tree-ssa-coalesce.c (struct coalesce_pair): Add new field
>         CONFLICT_COUNT.
>         (struct ssa_conflicts): Move up earlier in the file.
>         (conflicts_, var_map_): New static variables.
>         (initialize_conflict_count): New function to initialize the
>         CONFLICT_COUNT field for each conflict pair.
>         (compare_pairs): Lazily initialize the conflict count and use it
>         as the first tie-breaker.
>         (sort_coalesce_list): Add new arguments conflicts, map.  Initialize
>         and wipe conflicts_ and map_ around the call to qsort.  Remove
>         special case for 2 coalesce pairs.
>         (find_coalesce_pair): Initialize CONFLICT_COUNT field.
>         (dump_coalesce_list): Print the CONFLICT_COUNT field too.
>         * bitmap.c (bitmap_count_unique_bits): New function.
>         (bitmap_count_bits_in_word): New function, extracted from
>         bitmap_count_bits.
>         (bitmap_count_bits): Use bitmap_count_bits_in_word.
>         * bitmap.h (bitmap_count_unique_bits): Declare it.
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 05389b6..96f1f11 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,6 +1,17 @@
>  2016-03-18  Jeff Law  <l...@redhat.com>
>
>         PR tree-optimization/64058
> +       * tree-ssa-coalesce.c (struct coalesce_pair): Add new field
> +       CONFLICT_COUNT.
> +       (struct ssa_conflicts): Move up earlier in the file.
> +       (conflicts_, var_map_): New static variables.
> +       (initialize_conflict_count): New function to initialize the
> +       CONFLICT_COUNT field for each conflict pair.
> +       (compare_pairs): Lazily initialize the conflict count and use it
> +       as the first tie-breaker.
> +       (sort_coalesce_list): Add new arguments conflicts, map.  Initialize
> +       and wipe conflicts_ and map_ around the call to qsort.  Remove
> +       special case for 2 coalesce pairs.
>         * bitmap.c (bitmap_count_unique_bits): New function.
>         (bitmap_count_bits_in_word): New function, extracted from
>         bitmap_count_bits.
> diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c
> index e49876e..7120d29 100644
> --- a/gcc/tree-ssa-coalesce.c
> +++ b/gcc/tree-ssa-coalesce.c
> @@ -51,12 +51,40 @@ struct coalesce_pair
>    int second_element;
>    int cost;
>
> +  /* A count of the number of unique partitions this pair would conflict
> +     with if coalescing was successful.  This is the secondary sort key,
> +     given two pairs with equal costs, we will prefer the pair with a
> smaller
> +     conflict set.
> +
> +     This is lazily initialized when we discover two coalescing pairs have
> +     the same primary cost.
> +
> +     Note this is not updated and propagated as pairs are coalesced.  */
> +  int conflict_count;
> +
>    /* The order in which coalescing pairs are discovered is recorded in this
>       field, which is used as the final tie breaker when sorting coalesce
>       pairs.  */
>    int index;
>  };
>
> +/* This represents a conflict graph.  Implemented as an array of bitmaps.
> +   A full matrix is used for conflicts rather than just upper triangular
> form.
> +   this make sit much simpler and faster to perform conflict merges.  */
> +
> +struct ssa_conflicts
> +{
> +  bitmap_obstack obstack;      /* A place to allocate our bitmaps.  */
> +  vec<bitmap> conflicts;
> +};
> +
> +/* The narrow API of the qsort comparison function doesn't allow easy
> +   access to additional arguments.  So we have two globals (ick) to hold
> +   the data we need.  They're initialized before the call to qsort and
> +   wiped immediately after.  */
> +ssa_conflicts *conflicts_;
> +var_map map_;
> +
>  /* Coalesce pair hashtable helpers.  */
>
>  struct coalesce_pair_hasher : nofree_ptr_hash <coalesce_pair>
> @@ -303,6 +331,7 @@ find_coalesce_pair (coalesce_list *cl, int p1, int p2,
> bool create)
>        pair->second_element = p.second_element;
>        pair->cost = 0;
>        pair->index = num_coalesce_pairs (cl);
> +      pair->conflict_count = 0;
>        *slot = pair;
>      }
>
> @@ -345,21 +374,66 @@ add_coalesce (coalesce_list *cl, int p1, int p2, int
> value)
>      }
>  }
>
> +/* Compute and record how many unique conflicts would exist for the
> +   representative partition for each coalesce pair in CL.
> +
> +   CONFLICTS is the conflict graph and MAP is the current partition view.
> */
> +
> +static void
> +initialize_conflict_count (coalesce_pair *p,
> +                          ssa_conflicts *conflicts,
> +                          var_map map)
> +{
> +  int p1 = var_to_partition (map, ssa_name (p->first_element));
> +  int p2 = var_to_partition (map, ssa_name (p->second_element));
> +
> +  /* 4 cases.  If both P1 and P2 have conflicts, then build their
> +     union and count the members.  Else handle the degenerate cases
> +     in the obvious ways.  */
> +  if (conflicts->conflicts[p1] && conflicts->conflicts[p2])
> +    p->conflict_count = bitmap_count_unique_bits (conflicts->conflicts[p1],
> +                                                 conflicts->conflicts[p2]);
> +  else if (conflicts->conflicts[p1])
> +    p->conflict_count = bitmap_count_bits (conflicts->conflicts[p1]);
> +  else if (conflicts->conflicts[p2])
> +    p->conflict_count = bitmap_count_bits (conflicts->conflicts[p2]);
> +  else
> +    p->conflict_count = 0;
> +}
> +
>
>  /* Comparison function to allow qsort to sort P1 and P2 in Ascending order.
> */
>
>  static int
>  compare_pairs (const void *p1, const void *p2)
>  {
> -  const coalesce_pair *const *const pp1 = (const coalesce_pair *const *)
> p1;
> -  const coalesce_pair *const *const pp2 = (const coalesce_pair *const *)
> p2;
> +  coalesce_pair *const *const pp1 = (coalesce_pair *const *) p1;
> +  coalesce_pair *const *const pp2 = (coalesce_pair *const *) p2;
>    int result;
>
>    result = (* pp1)->cost - (* pp2)->cost;
> -  /* And if everything else is equal, then sort based on which
> -     coalesce pair was found first.  */
> +  /* We use the size of the resulting conflict set as the secondary sort
> key.
> +     Given two equal costing coalesce pairs, we want to prefer the pair
> that
> +     has the smaller conflict set.  */
>    if (result == 0)
> -    result = (*pp2)->index - (*pp1)->index;
> +    {
> +      if (flag_expensive_optimizations)
> +       {
> +         /* Lazily initialize the conflict counts as it's fairly expensive
> +            to compute.  */
> +         if ((*pp2)->conflict_count == 0)
> +           initialize_conflict_count (*pp2, conflicts_, map_);
> +         if ((*pp1)->conflict_count == 0)
> +           initialize_conflict_count (*pp1, conflicts_, map_);
> +
> +         result = (*pp2)->conflict_count - (*pp1)->conflict_count;
> +       }
> +
> +      /* And if everything else is equal, then sort based on which
> +        coalesce pair was found first.  */
> +      if (result == 0)
> +       result = (*pp2)->index - (*pp1)->index;
> +    }
>
>    return result;
>  }
> @@ -374,7 +448,7 @@ compare_pairs (const void *p1, const void *p2)
>     in order from most important coalesce to least important.  */
>
>  static void
> -sort_coalesce_list (coalesce_list *cl)
> +sort_coalesce_list (coalesce_list *cl, ssa_conflicts *conflicts, var_map
> map)
>  {
>    unsigned x, num;
>    coalesce_pair *p;
> @@ -400,19 +474,14 @@ sort_coalesce_list (coalesce_list *cl)
>    if (num == 1)
>      return;
>
> -  /* If there are only 2, just pick swap them if the order isn't correct.
> */
> -  if (num == 2)
> -    {
> -      if (cl->sorted[0]->cost > cl->sorted[1]->cost)
> -       std::swap (cl->sorted[0], cl->sorted[1]);
> -      return;
> -    }
> -
> -  /* Only call qsort if there are more than 2 items.
> -     ??? Maybe std::sort will do better, provided that compare_pairs
> -     can be inlined.  */
> -  if (num > 2)
> -      qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
> +  /* We don't want to depend on qsort_r, so we have to stuff away
> +     additional data into globals so it can be referenced in
> +     compare_pairs.  */
> +  conflicts_ = conflicts;
> +  map_ = map;
> +  qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
> +  conflicts_ = NULL;
> +  map_ = NULL;
>  }
>
>
> @@ -437,7 +506,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
>           print_generic_expr (f, var1, TDF_SLIM);
>           fprintf (f, " <-> ");
>           print_generic_expr (f, var2, TDF_SLIM);
> -         fprintf (f, "  (%1d), ", node->cost);
> +         fprintf (f, "  (%1d, %1d), ", node->cost, node->conflict_count);
>           fprintf (f, "\n");
>         }
>      }
> @@ -447,7 +516,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
>        for (x = cl->num_sorted - 1 ; x >=0; x--)
>          {
>           node = cl->sorted[x];
> -         fprintf (f, "(%d) ", node->cost);
> +         fprintf (f, "(%d, %d) ", node->cost, node->conflict_count);
>           var = ssa_name (node->first_element);
>           print_generic_expr (f, var, TDF_SLIM);
>           fprintf (f, " <-> ");
> @@ -459,16 +528,6 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
>  }
>
>
> -/* This represents a conflict graph.  Implemented as an array of bitmaps.
> -   A full matrix is used for conflicts rather than just upper triangular
> form.
> -   this make sit much simpler and faster to perform conflict merges.  */
> -
> -struct ssa_conflicts
> -{
> -  bitmap_obstack obstack;      /* A place to allocate our bitmaps.  */
> -  vec<bitmap> conflicts;
> -};
> -
>  /* Return an empty new conflict graph for SIZE elements.  */
>
>  static inline ssa_conflicts *
> @@ -1800,7 +1859,7 @@ coalesce_ssa_name (void)
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      ssa_conflicts_dump (dump_file, graph);
>
> -  sort_coalesce_list (cl);
> +  sort_coalesce_list (cl, graph, map);
>
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      {
>

Reply via email to