On Mon, Oct 22, 2012 at 8:31 PM, Easwaran Raman <era...@google.com> wrote:
> On Mon, Oct 22, 2012 at 12:59 AM, Richard Biener
> <richard.guent...@gmail.com> wrote:
>> On Fri, Oct 19, 2012 at 12:36 AM, Easwaran Raman <era...@google.com> wrote:
>>> Hi,
>>>
>>> During expression reassociation, statements are conservatively moved
>>> downwards to ensure that dependences are correctly satisfied after
>>> reassocation. This could lead to lengthening of live ranges. This
>>> patch moves statements only to the extent necessary. Bootstraps and no
>>> test regression on x86_64/linux. OK for trunk?
>>>
>>> Thanks,
>>> Easwaran
>>>
>>> 2012-10-18   Easwaran Raman  <era...@google.com>
>>> * tree-ssa-reassoc.c(assign_uids): New function.
>>> (assign_uids_in_relevant_bbs): Likewise.
>>> (ensure_ops_are_available): Likewise.
>>> (rewrite_expr_tree): Do not move statements beyond what is
>>> necessary. Remove call to swap_ops_for_binary_stmt...
>>> (reassociate_bb): ... and move it here.
>>>
>>>
>>>
>>> Index: gcc/tree-ssa-reassoc.c
>>> ===================================================================
>>> --- gcc/tree-ssa-reassoc.c (revision 192487)
>>> +++ gcc/tree-ssa-reassoc.c (working copy)
>>> @@ -2250,6 +2250,128 @@ swap_ops_for_binary_stmt (VEC(operand_entry_t, hea
>>>      }
>>>  }
>>>
>>> +/* Assign UIDs to statements in basic block BB.  */
>>> +
>>> +static void
>>> +assign_uids (basic_block bb)
>>> +{
>>> +  unsigned uid = 0;
>>> +  gimple_stmt_iterator gsi;
>>> +  /* First assign uids to phis.  */
>>> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> +    {
>>> +      gimple stmt = gsi_stmt (gsi);
>>> +      gimple_set_uid (stmt, uid++);
>>> +    }
>>> +
>>> +  /* Then assign uids to stmts.  */
>>> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> +    {
>>> +      gimple stmt = gsi_stmt (gsi);
>>> +      gimple_set_uid (stmt, uid++);
>>> +    }
>>> +}
>>> +
>>> +/* For each operand in OPS, find the basic block that contains the 
>>> statement
>>> +   which defines the operand. For all such basic blocks, assign UIDs.  */
>>> +
>>> +static void
>>> +assign_uids_in_relevant_bbs (VEC(operand_entry_t, heap) * ops)
>>> +{
>>> +  operand_entry_t oe;
>>> +  int i;
>>> +  struct pointer_set_t *seen_bbs = pointer_set_create ();
>>> +
>>> +  for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
>>> +    {
>>> +      gimple def_stmt;
>>> +      basic_block bb;
>>> +      if (TREE_CODE (oe->op) != SSA_NAME)
>>> +        continue;
>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>> +      bb = gimple_bb (def_stmt);
>>> +      if (!pointer_set_contains (seen_bbs, bb))
>>> +        {
>>> +          assign_uids (bb);
>>> +          pointer_set_insert (seen_bbs, bb);
>>> +        }
>>> +    }
>>> +  pointer_set_destroy (seen_bbs);
>>> +}
>>
>> Please assign UIDs once using the existing renumber_gimple_stmt_uids ().
>> You seem to call the above multiple times and thus do work bigger than
>> O(number of basic blocks).
>
> The reason I call the above multiple times is that gsi_move_before
> might get called between two calls to the above. For instance, after
> rewrite_expr_tree is called once, the following sequence of calls
> could happen:
> reassociate_bb -> linearize_expr_tree -> linearize_expr ->
> gsi_move_before.  So it is not sufficient to call
> renumber_gimple_stmt_uids once per do_reassoc. Or do you want me to
> use renumber_gimple_stmt_uids_in_blocks instead of
> assign_uids_in_relevant_bbs?

It's sufficient to call it once if you conservatively update UIDs at stmt
move / insert time (assign the same UID as the stmt before/after).

>
>
>>> +/* Ensure that operands in the OPS vector starting from OPINDEXth
>>> entry are live
>>> +   at STMT.  This is accomplished by moving STMT if needed.  */
>>> +
>>> +static void
>>> +ensure_ops_are_available (gimple stmt, VEC(operand_entry_t, heap) *
>>> ops, int opindex)
>>> +{
>>> +  int i;
>>> +  int len = VEC_length (operand_entry_t, ops);
>>> +  gimple insert_stmt = stmt;
>>> +  basic_block insert_bb = gimple_bb (stmt);
>>> +  gimple_stmt_iterator gsi_insert, gsistmt;
>>> +  for (i = opindex; i < len; i++)
>>> +    {
>>
>> Likewise you call this for each call to rewrite_expr_tree, so it seems to me
>> this is quadratic in the number of ops in the op vector.
>
> The call to ensure_ops_are_available inside rewrite_expr_tree is
> guarded by if (!moved) and I am setting moved = true there to ensure
> that ensure_ops_are_available inside is called once per reassociation
> of a expression tree.

It would be a lot easier to understand this function if you call it once
after rewrite_expr_tree finished.

>>
>> Why make this all so complicated?  It seems to me that we should
>> fixup stmt order only after the whole ops vector has been materialized.
>
>
>>
>>> +      operand_entry_t oe = VEC_index (operand_entry_t, ops, i);
>>> +      gimple def_stmt;
>>> +      basic_block def_bb;
>>> +      /* Ignore constants and operands with default definitons.  */
>>> +      if (TREE_CODE (oe->op) != SSA_NAME
>>> +          || SSA_NAME_IS_DEFAULT_DEF (oe->op))
>>> +        continue;
>>> +      def_stmt = SSA_NAME_DEF_STMT (oe->op);
>>> +      def_bb = gimple_bb (def_stmt);
>>> +      if (def_bb != insert_bb
>>> +          && !dominated_by_p (CDI_DOMINATORS, insert_bb, def_bb))
>>> +        {
>>> +          insert_bb = def_bb;
>>> +          insert_stmt = def_stmt;
>>> +        }
>>> +      else if (def_bb == insert_bb
>>> +               && gimple_uid (insert_stmt) < gimple_uid (def_stmt))
>>> +        insert_stmt = def_stmt;
>>> +    }
>>> +  if (insert_stmt == stmt)
>>> +    return;
>>> +  gsistmt = gsi_for_stmt (stmt);
>>> +  /* If GSI_STMT is a phi node, then do not insert just after that 
>>> statement.
>>> +     Instead, find the first non-label gimple statement in BB and insert 
>>> before
>>> +     that.  */
>>> +  if (gimple_code (insert_stmt) == GIMPLE_PHI)
>>> +    {
>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>> +    }
>>> +  /* Statements marked for throw can not be in the middle of a basic 
>>> block. So
>>> +     we can not insert a statement (not marked for throw) immediately
>>> after.  */
>>> +  else if (lookup_stmt_eh_lp (insert_stmt) > 0
>>
>> that's already performed by stmt_can_throw_internal
> Ok. I was hit by the "statement marked for throw in middle of block"
> error in verify_gimple_in_cfg  and simply copied the sequence of
> checks that led to that.
>
>>
>>> +           && stmt_can_throw_internal (insert_stmt))
>>
>> But all this should be a non-issue as re-assoc should never assign an ops
>> vector entry for such stmts (but it could have leafs defined by such stmts).
>> If you only ever move definitions down, like the present code does caring
>> about leafs should not be necessary.
>
> I added this as I got an ICE in verify_gimple_in_cfg. So it seems like
> such statements gets assigned into the ops vector.

That would be a bug.  See how linearize_expr_tree checks whether
the defining stmt of an op may throw.  That is, it _does_ add the
SSA def to the vector but does not recurse on its defining stmt operands.

Richard.

> Thanks,
> Easwaran
>>
>> Richard.
>>
>>> +    {
>>> +      edge e, succ_edge = NULL;
>>> +      edge_iterator ei;
>>> +
>>> +      /* There should be exactly one normal edge out of the basic block.  
>>> */
>>> +      FOR_EACH_EDGE (e, ei, insert_bb->succs)
>>> +        {
>>> +          if (!(e->flags & EDGE_COMPLEX))
>>> +            {
>>> +              gcc_assert (succ_edge == NULL);
>>> +              succ_edge = e;
>>> +            }
>>> +        }
>>> +      /* Insert STMT at the beginning of the successor basic block.  */
>>> +      insert_bb = succ_edge->dest;
>>> +      gsi_insert = gsi_after_labels (insert_bb);
>>> +      gsi_move_before (&gsistmt, &gsi_insert);
>>> +    }
>>> +  else
>>> +    {
>>> +      gsi_insert = gsi_for_stmt (insert_stmt);
>>> +      gsi_move_after (&gsistmt, &gsi_insert);
>>> +    }
>>> +
>>> +}
>>> +
>>>  /* Recursively rewrite our linearized statements so that the operators
>>>     match those in OPS[OPINDEX], putting the computation in rank
>>>     order.  */
>>> @@ -2262,11 +2384,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>    tree rhs2 = gimple_assign_rhs2 (stmt);
>>>    operand_entry_t oe;
>>>
>>> -  /* If we have three operands left, then we want to make sure the ones
>>> -     that get the double binary op are chosen wisely.  */
>>> -  if (opindex + 3 == VEC_length (operand_entry_t, ops))
>>> -    swap_ops_for_binary_stmt (ops, opindex, stmt);
>>> -
>>>    /* The final recursion case for this function is that you have
>>>       exactly two operations left.
>>>       If we had one exactly one op in the entire list to start with, we
>>> @@ -2312,19 +2429,17 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>>>      {
>>>        if (!moved)
>>>   {
>>> -  gimple_stmt_iterator gsinow, gsirhs1;
>>> -  gimple stmt1 = stmt, stmt2;
>>> -  unsigned int count;
>>> +  gimple stmt1 = stmt;
>>> +  unsigned int count, i = 1;
>>>
>>> -  gsinow = gsi_for_stmt (stmt);
>>>    count = VEC_length (operand_entry_t, ops) - opindex - 2;
>>> -  while (count-- != 0)
>>> +  while (i <= count)
>>>      {
>>> -      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>> -      gsirhs1 = gsi_for_stmt (stmt2);
>>> -      gsi_move_before (&gsirhs1, &gsinow);
>>> -      gsi_prev (&gsinow);
>>> -      stmt1 = stmt2;
>>> +      stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
>>> +              /* Ensure that STMT1 is moved to a place where all operands 
>>> in
>>> +                 OPS[opindex + i...] are available.  */
>>> +              ensure_ops_are_available (stmt1, ops, opindex + i);
>>> +              i++;
>>>      }
>>>    moved = true;
>>>   }
>>> @@ -3542,8 +3657,18 @@ reassociate_bb (basic_block bb)
>>>        && VEC_length (operand_entry_t, ops) > 3)
>>>      rewrite_expr_tree_parallel (stmt, width, ops);
>>>    else
>>> -    rewrite_expr_tree (stmt, 0, ops, false);
>>> +                    {
>>> +                      /* When there are three operands left, we want
>>> +                         to make sure the ones that get the double
>>> +                         binary op are chosen wisely.  */
>>> +                      int len = VEC_length (operand_entry_t, ops);
>>> +                      if (len >= 3)
>>> +                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
>>>
>>> +                      assign_uids_in_relevant_bbs (ops);
>>> +      rewrite_expr_tree (stmt, 0, ops, false);
>>> +                    }
>>> +
>>>    /* If we combined some repeated factors into a
>>>       __builtin_powi call, multiply that result by the
>>>       reassociated operands.  */

Reply via email to