On Wed, 22 Sep 2021, Ilya Leoshkevich wrote:

> PR tree-optimization/49749 introduced code that shortens dependency
> chains containing loop accumulators by placing them last on operand
> lists of associative operations.
> 
> 456.hmmer benchmark on s390 could benefit from this, however, the code
> that needs it modifies loop accumulator before using it, and since only
> so-called loop-carried phis are are treated as loop accumulators, the
> code in the present form doesn't really help.   According to Bill
> Schmidt - the original author - such a conservative approach was chosen
> so as to avoid unnecessarily swapping operands, which might cause
> unpredictable effects.  However, giving special treatment to forms of
> loop accumulators is acceptable.
> 
> The definition of loop-carried phi is: it's a single-use phi, which is
> used in the same innermost loop it's defined in, at least one argument
> of which is defined in the same innermost loop as the phi itself.
> Given this, it seems natural to treat single uses of such phis as phis
> themselves.
> 
> gcc/ChangeLog:
> 
>       * tree-ssa-reassoc.c (biased_names): New global.
>       (propagate_bias_p): New function.
>       (loop_carried_phi): Remove.
>       (propagate_rank): Propagate bias along single uses.
>       (get_rank): Update biased_names when needed.
> ---
>  gcc/tree-ssa-reassoc.c | 97 ++++++++++++++++++++++++++++--------------
>  1 file changed, 64 insertions(+), 33 deletions(-)
> 
> diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
> index 420c14e8cf5..2f7a8882aac 100644
> --- a/gcc/tree-ssa-reassoc.c
> +++ b/gcc/tree-ssa-reassoc.c
> @@ -211,6 +211,10 @@ static int64_t *bb_rank;
>  /* Operand->rank hashtable.  */
>  static hash_map<tree, int64_t> *operand_rank;
>  
> +/* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
> +   biased.  */
> +static auto_bitmap biased_names;
> +
>  /* Vector of SSA_NAMEs on which after reassociate_bb is done with
>     all basic blocks the CFG should be adjusted - basic blocks
>     split right after that SSA_NAME's definition statement and before
> @@ -256,6 +260,50 @@ reassoc_remove_stmt (gimple_stmt_iterator *gsi)
>     the rank difference between two blocks.  */
>  #define PHI_LOOP_BIAS (1 << 15)
>  
> +/* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
> +   operands to the STMT's left-hand side.  The goal is to preserve bias in 
> code
> +   like this:
> +
> +     x_1 = phi(x_0, x_2)
> +     a = x_1 | 1
> +     b = a ^ 2
> +     .MEM = b
> +     c = b + d
> +     x_2 = c + e
> +
> +   That is, we need to preserve bias along single-use chains originating from
> +   loop-carried phis.  Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
> +   uses, because only they participate in rank propagation.  */
> +static bool
> +propagate_bias_p (gimple *stmt)
> +{
> +  use_operand_p use;
> +  imm_use_iterator use_iter;
> +  gimple *single_use_stmt = NULL;
> +
> +  FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
> +    {
> +      gimple *current_use_stmt = USE_STMT (use);
> +
> +      if (is_gimple_assign (current_use_stmt)
> +       && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
> +     {
> +       if (single_use_stmt != NULL)

what if single_use_stmt == current_use_stmt?  We might have two
uses on a stmt after all - should that still be biased?  I guess not
and thus the check is correct?

> +         return false;
> +       single_use_stmt = current_use_stmt;
> +     }
> +    }
> +
> +  if (single_use_stmt == NULL)
> +    return false;
> +
> +  if (gimple_bb (stmt)->loop_father
> +      != gimple_bb (single_use_stmt)->loop_father)
> +    return false;
> +
> +  return true;
> +}
> +
>  /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
>     an innermost loop, and the phi has only a single use which is inside
>     the loop, then the rank is the block rank of the loop latch plus an
> @@ -313,46 +361,23 @@ phi_rank (gimple *stmt)
>    return bb_rank[bb->index];
>  }
>  
> -/* If EXP is an SSA_NAME defined by a PHI statement that represents a
> -   loop-carried dependence of an innermost loop, return TRUE; else
> -   return FALSE.  */
> -static bool
> -loop_carried_phi (tree exp)
> -{
> -  gimple *phi_stmt;
> -  int64_t block_rank;
> -
> -  if (TREE_CODE (exp) != SSA_NAME
> -      || SSA_NAME_IS_DEFAULT_DEF (exp))
> -    return false;
> -
> -  phi_stmt = SSA_NAME_DEF_STMT (exp);
> -
> -  if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
> -    return false;
> -
> -  /* Non-loop-carried phis have block rank.  Loop-carried phis have
> -     an additional bias added in.  If this phi doesn't have block rank,
> -     it's biased and should not be propagated.  */
> -  block_rank = bb_rank[gimple_bb (phi_stmt)->index];
> -
> -  if (phi_rank (phi_stmt) != block_rank)
> -    return true;
> -
> -  return false;
> -}
> -
>  /* Return the maximum of RANK and the rank that should be propagated
>     from expression OP.  For most operands, this is just the rank of OP.
>     For loop-carried phis, the value is zero to avoid undoing the bias
>     in favor of the phi.  */
>  static int64_t
> -propagate_rank (int64_t rank, tree op)
> +propagate_rank (int64_t rank, tree op, gimple *stmt, bool *bias_p)
>  {
>    int64_t op_rank;
>  
> -  if (loop_carried_phi (op))
> -    return rank;
> +  if (TREE_CODE (op) == SSA_NAME
> +      && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
> +    {
> +      if (propagate_bias_p (stmt))
> +     *bias_p = true;
> +      else
> +     return rank;
> +    }
>  
>    op_rank = get_rank (op);
>  
> @@ -440,6 +465,8 @@ get_rank (tree e)
>  
>        else
>       {
> +       bool bias_p = false;
> +
>         /* Otherwise, find the maximum rank for the operands.  As an
>            exception, remove the bias from loop-carried phis when propagating
>            the rank so that dependent operations are not also biased.  */
> @@ -448,9 +475,12 @@ get_rank (tree e)
>            thus have rank 0.  */
>         rank = 0;
>         FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
> -         rank = propagate_rank (rank, op);
> +         rank = propagate_rank (rank, op, stmt, &bias_p);

It looks like if (propagate_bias_p (stmt)) is loop invariant here
and so when we inline the head this should simplify, avoiding
the new parameters to propagate_rank?

Otherwise looks good to me.

Richard.

Reply via email to