Re: Propagate profile counts during switch expansion

Jan Hubicka Thu, 04 Oct 2012 06:20:01 -0700

> Hi,
>  This patch propagates the profile counts during RTL expansion. In
> many cases, there is no way to determine the exact count of an edge
> generated during the expansion. So this patch uses some simple
> heuristics to estimate the edge counts but ensures that the counts of
> the basic blocks corresponding to the cases are (nearly the) same as
> at the gimple level.
> 
> Bootstrapped and profile-bootstrapped on an x86_64/linux machine. OK for 
> trunk?
> Index: gcc/expr.c
> ===================================================================
> --- gcc/expr.c (revision 191879)
> +++ gcc/expr.c (working copy)
> @@ -154,7 +154,7 @@ static rtx do_store_flag (sepops, rtx, enum machin
>  #ifdef PUSH_ROUNDING
>  static void emit_single_push_insn (enum machine_mode, rtx, tree);
>  #endif
> -static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
> +static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
>  static rtx const_vector_from_tree (tree);
>  static void write_complex_part (rtx, rtx, bool);
> 
> @@ -10894,7 +10894,7 @@ try_casesi (tree index_type, tree index_expr, tree
> 
>  static void
>  do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
> -      rtx default_label)
> +      rtx default_label, int default_probability)


Please document default_probability.
>  {
>    rtx temp, vector;
> 
> @@ -10910,9 +10910,17 @@ do_tablejump (rtx index, enum machine_mode mode, r
>       the maximum value of the range.  */
> 
>    if (default_label)
> -    emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
> -     default_label);
> +    {
> +      emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
> +       default_label);
> +      if (default_probability != -1)
> +        {
> +          rtx jump_insn = get_last_insn();
> +          add_reg_note (jump_insn, REG_BR_PROB, GEN_INT 
> (default_probability));
> +        }
> +    }

dojump already does this kind of logic, but it is bit more cureful:
      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
                               if_true_label);
      if (prob != -1 && profile_status != PROFILE_ABSENT)
        { 
          for (last = NEXT_INSN (last);
               last && NEXT_INSN (last);
               last = NEXT_INSN (last))
            if (JUMP_P (last))
              break;
          if (last
              && JUMP_P (last)
              && ! NEXT_INSN (last)
              && any_condjump_p (last))
            { 
              gcc_assert (!find_reg_note (last, REG_BR_PROB, 0));
              add_reg_note (last, REG_BR_PROB, GEN_INT (prob));
            }
        }

What about making emit_cmp_and_jump_insns taking the probability argument
and moving the code above inside?  Most of other places need updating to
propagate probabilities.

(compare_and_jump_seq in loop-unswitch probably also can be updated)
> @@ -10954,7 +10962,7 @@ do_tablejump (rtx index, enum machine_mode mode, r
> 
>  int
>  try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
> -       rtx table_label, rtx default_label)
> +       rtx table_label, rtx default_label, int default_probability)

Simiarly here.
> Index: gcc/cfgbuild.c
> ===================================================================
> --- gcc/cfgbuild.c (revision 191879)
> +++ gcc/cfgbuild.c (working copy)
> @@ -533,6 +533,23 @@ find_bb_boundaries (basic_block bb)
>      purge_dead_tablejump_edges (bb, table);
>  }
> 
> +/* If there is at least one edge in EDGES with a non-zero count, then
> +   compute probabilities based on the existing counts.  */
> +
> +static bool
> +gen_probabilities_from_existing_counts ( VEC(edge,gc) *edges) {
> +  edge e;
> +  edge_iterator ei;
> +  gcov_type count_sum = 0;
> +  FOR_EACH_EDGE(e, ei, edges)
> +    count_sum += e->count;
> +  if (count_sum == 0)
> +    return false;
> +  FOR_EACH_EDGE(e, ei, edges)
> +    e->probability = e->count * REG_BR_PROB_BASE / count_sum;
> +  return true;
> +}
> +
>  /*  Assume that frequency of basic block B is known.  Compute frequencies
>      and probabilities of outgoing edges.  */
> 
> @@ -560,7 +577,6 @@ compute_outgoing_frequencies (basic_block b)
>    return;
>   }
>      }
> -
>    if (single_succ_p (b))
>      {
>        e = single_succ_edge (b);
> @@ -568,7 +584,10 @@ compute_outgoing_frequencies (basic_block b)
>        e->count = b->count;
>        return;
>      }
> -  guess_outgoing_edge_probabilities (b);
> +  else if (!gen_probabilities_from_existing_counts (b->succs)){
> +    /* All outgoing edges of B have zero count. Guess probabilities.  */
> +    guess_outgoing_edge_probabilities (b);
> +  }

Hmm, I do not quite follow logic here.  
basic block B is one of many basic blocks that the original BB was split from.
It is possible that B may have some of original edges, but there may be new 
ones.
How you can guess the outgoing probabilitie shere.  Do you have an example?

Also gen_probabilities_from_existing_counts could probably also work based
on original edge frequencies.
> @@ -1664,7 +1668,7 @@ do_jump_if_equal (enum machine_mode mode, rtx op0,
> 
>  static struct case_node *
>  add_case_node (struct case_node *head, tree low, tree high,
> -               tree label, alloc_pool case_node_pool)
> +               tree label, gcov_type count, alloc_pool case_node_pool)

Please document  COUNT here.
>  {
>    struct case_node *r;
> 
> @@ -1677,6 +1681,8 @@ add_case_node (struct case_node *head, tree low, t
>    r->high = high;
>    r->code_label = label;
>    r->parent = r->left = NULL;
> +  r->count = count;
> +  r->subtree_count = count;
>    r->right = head;
>    return r;
>  }

.. and here
> @@ -1803,7 +1809,8 @@ expand_switch_as_decision_tree_p (tree range,
> 
>  static void
>  emit_case_decision_tree (tree index_expr, tree index_type,
> - struct case_node *case_list, rtx default_label)
> + struct case_node *case_list, rtx default_label,
> +                         gcov_type default_count)
>  {
>    rtx index = expand_normal (index_expr);
> 
> @@ -1839,15 +1846,29 @@ emit_case_decision_tree (tree index_expr, tree ind
>        dump_case_nodes (dump_file, case_list, indent_step, 0);
>      }
> 
> -  emit_case_nodes (index, case_list, default_label, index_type);
> +  emit_case_nodes (index, case_list, default_label, default_count, 
> index_type);
>    if (default_label)
>      emit_jump (default_label);
>  }
> 

And document functio nhere.

> +static gcov_type
> +get_outgoing_edge_counts (basic_block bb)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  gcov_type count_sum = 0;
> +  FOR_EACH_EDGE(e, ei, bb->succs)
> +    count_sum += e->count;
> +  return count_sum;
> +}
> +
> +#define case_probability(x, y) \
> +    ((y) ? (gcc_assert (x <= y), (x) * REG_BR_PROB_BASE  / (y))  : -1)

You want to use RDIV here for better rounding.  I would make this an inline
functions these days.
> +
>  /* Generate a dispatch tabler, switching on INDEX_EXPR and jumping to
>     one of the labels in CASE_LIST or to the DEFAULT_LABEL.
>     MINVAL, MAXVAL, and RANGE are the extrema and range of the case
> -   labels in CASE_LIST.
> +   labels in CASE_LIST. STMT_BB is the basic block containing the statement.
> 
>     First, a jump insn is emitted.  First we try "casesi".  If that
>     fails, try "tablejump".   A target *must* have one of them (or both).
> @@ -1860,19 +1881,23 @@ emit_case_decision_tree (tree index_expr, tree ind
>  static void
>  emit_case_dispatch_table (tree index_expr, tree index_type,
>    struct case_node *case_list, rtx default_label,
> -  tree minval, tree maxval, tree range)
> +  tree minval, tree maxval, tree range,
> +                          basic_block stmt_bb)
>  {
>    int i, ncases;
>    struct case_node *n;
>    rtx *labelvec;
>    rtx fallback_label = label_rtx (case_list->code_label);
>    rtx table_label = gen_label_rtx ();
> +  bool has_gaps = false;
> +  edge default_edge = EDGE_SUCC(stmt_bb, 0);
> +  gcov_type default_count = default_edge->count;
> +  gcov_type count = get_outgoing_edge_counts (stmt_bb);
> +  bool try_with_tablejump = false;

Here, I think you want to first decide whether you base expansion on counts
(i.e. any counts involved is non-0) or probabilities.  We really want to 
maintain
the profile up-to-date even with guessed profile.  The inconsistencies
propagate across CFG and easilly confuse later optimizers.
Everywhere you are using count comming from the original statement, you can 
also use
the probability.

The newly produced control flow will be optimized and the edges frequencies will
be propagated along.

> +
> +  default_edge->count = default_count;
> +  if (count)
> +    {
> +      edge e;
> +      edge_iterator ei;
> +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
> +        e->probability = e->count * REG_BR_PROB_BASE / count;
> +    }

Hmm, this updates origina BB containing the switch statement?  Of course,
modulo roundoff errors, this should hold.  I wonder where did you got the
diferences and why do you need this?  You are going to output new control flow
and find_many_sub_basic_blocks will recompute all the counts/frequencies inside
anyway?
Also you want to use RDIV here.
> @@ -2358,6 +2433,20 @@ node_is_bounded (case_node_ptr node, tree index_ty
>    && node_has_high_bound (node, index_type));
>  }
> 
> +
> +/* Attach a REG_BR_PROB note to the last created RTX instruction if
> +   PROBABILITY is not -1.  */
> +
> +static void
> +add_prob_note_to_last_insn(int probability)
> +{
> +  if (probability != -1)
> +    {
> +      rtx jump_insn = get_last_insn();
> +      add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (probability));
> +    }
> +}

Well, I am not sure this is safe thing to do since you do not really
know what the last instruction is.  In any case you want to test
it is an conditional jump instruction as in the code I quoted
above.

> @@ -2430,7 +2523,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         GT, NULL_RTX, mode, unsignedp,
>         label_rtx (node->right->code_label));
> -      emit_case_nodes (index, node->left, default_label, index_type);
> +              probability = case_probability (node->right->count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
> +      emit_case_nodes (index, node->left, default_label, default_count,
> +                               index_type);

Hmm, here you seem to be distributing the probabilities of the counts reached.
What happens in the case when the edge probability needs to be distributed 
across
nodes of the decision tree. I.e. in
t(int a)
{
  switch (a)
   { 
     case 100:
     case 200:
     case 300:
     case 400:
     case 500:
     case 600:
     case 700:
     case 800:
     case 900:
       t();
     case 101:
     case 202:
     case 303:
     case 404:
     case 505:
     case 606:
     case 707:
     case 808:
     case 909:
       q();
   }
}

>      }
> 
>    else if (node_is_bounded (node->left, index_type))
> @@ -2442,7 +2539,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         LT, NULL_RTX, mode, unsignedp,
>         label_rtx (node->left->code_label));
> -      emit_case_nodes (index, node->right, default_label, index_type);
> +              probability = case_probability (node->left->count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);

Here, and apparently in all uses of add_prob_note_to_last_insn, you want to use
the new parameter of emit_cmp_and_jump_insn.

Otherwise the patch seems to make sense.  Thanks for looking into this issue.

Honza

Re: Propagate profile counts during switch expansion

Reply via email to