Re: [RFC] Converting end of loop computations to MIN_EXPRs.

Ramana Radhakrishnan Mon, 30 Apr 2012 23:37:19 -0700

Sorry about the delayed response, I've been away for some time.

>
> I don't exactly understand why the general transform is not advisable.
> We already synthesize min/max operations.



>
> Can you elaborate on why you think that better code might be generated
> when not doing this transform?

The reason why I wasn't happy was because of the code we ended up
generating in this case for ARM comparing the simple examples showed
the following difference - while I'm pretty sure I can massage the
backend to generate the right form in this case with splitters I
probably didn't realize this when I wrote the mail. Given this, I
wonder if it is worth in general doing this transformation in a fold
type operation rather than restricting ourselves only to invariant
operands ?


The canonical example is as below :


#define min(x, y) ((x) < (y)) ? (x) : (y)
int foo (int i, int x ,int y)
{
// return ( i < x) && (i < y);
 return i < (min (x, y));
}


Case with min_expr:

        cmp     r2, r1  @ 8     *arm_smin_insn/1        [length = 8]
        movge   r2, r1
        cmp     r2, r0  @ 23    *arm_cmpsi_insn/3       [length = 4]
        movle   r0, #0  @ 24    *p *arm_movsi_insn/2    [length = 4]
        movgt   r0, #1  @ 25    *p *arm_movsi_insn/2    [length = 4]
        bx      lr      @ 28    *arm_return     [length = 12]


This might well be .

      cmp       r2, r0
      cmpge  r1, r0
     movle    r0, #0
     movgt    r0, #1
     bx           lr

Case without min_expr:

        cmp     r0, r2  @ 28    *cmp_and/6      [length = 8]
        cmplt   r0, r1
        movge   r0, #0  @ 29    *mov_scc        [length = 8]
        movlt   r0, #1
        bx      lr      @ 32    *arm_return     [length = 12]



>
>> #define min(x,y) ((x) <= (y) ? (x) : (y))
>>
>> void foo (int x, int y, int *  a, int * b, int *c)
>> {
>>  int i;
>>
>>  for (i = 0;
>>       i < x && i < y;
>>       /* i < min (x, y); */
>>       i++)
>>    a[i] = b[i] * c[i];
>>
>> }
>>
>> The patch below deals with this case and I'm guessing that it could
>> also handle more of the comparison cases and come up with more
>> intelligent choices and should be made quite a lot more robust than
>> what it is right now.
>
> Yes.  At least if you have i < 5 && i < y we canonicalize it to
> i <= 4 && i < y, so your pattern matching would fail.

Of-course considering overflow semantics you could transform this to
i < min (x +1, y) where the original condition was i <= x && i < y.

Thinking about it , it's probably right to state that

i op1 X && i op2 Y => i op min (X1, Y1)

when op1 and op2 are identical or according to the table below :

op1     op2             op            X1           Y1
  <         <=               <=            X + 1        Y
  >         >=               >                X            Y + 1
 < =        <                <=             X             Y + 1
  >=         >               >               X + 1         Y


Other than being careful about overflow semantics the second table
is probably worthwhile looking at -

>
> Btw, the canonical case this happens in is probably
>
>   for (i = 0; i < n; ++i)
>     for (j = 0; j < m && j < i; ++j)
>       a[i][j] = ...
>
> thus iterating over the lower/upper triangular part of a non-square matrix
> (including or not including the diagonal, thus also j < m && j <= i

Ok thanks - fair enough .


Ramana

>
> Richard.
>
>> regards,
>> Ramana
>>
>>
>>
>> diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
>> index ce5eb20..a529536 100644
>> --- a/gcc/tree-ssa-loop-im.c
>> +++ b/gcc/tree-ssa-loop-im.c
>> @@ -563,6 +563,7 @@ stmt_cost (gimple stmt)
>>
>>   switch (gimple_assign_rhs_code (stmt))
>>     {
>> +    case MIN_EXPR:
>>     case MULT_EXPR:
>>     case WIDEN_MULT_EXPR:
>>     case WIDEN_MULT_PLUS_EXPR:
>> @@ -971,6 +972,124 @@ rewrite_reciprocal (gimple_stmt_iterator *bsi)
>>   return stmt1;
>>  }
>>
>> +/* We look for a sequence that is :
>> +   def_stmt1  : x = a < b
>> +   def_stmt2  : y = a < c
>> +   stmt: z = x & y
>> +   use_stmt_cond: if ( z != 0)
>> +
>> +   where b, c are loop invariant .
>> +
>> +   In which case we might as well replace this by :
>> +
>> +   t = min (b, c)
>> +   if ( a < t )
>> +*/
>> +
>> +static gimple
>> +rewrite_min_test (gimple_stmt_iterator *bsi)
>> +{
>> +  gimple stmt, def_stmt_x, def_stmt_y, use_stmt_cond, stmt1;
>> +  tree x, y, z, a, b, c, var, t, name;
>> +  use_operand_p use;
>> +  bool is_lhs_of_comparison = false;
>> +
>> +  stmt = gsi_stmt (*bsi);
>> +  z = gimple_assign_lhs (stmt);
>> +
>> +  /* We start by looking at whether x is used in the
>> +     right set of conditions.  */
>> +  if (TREE_CODE (z) != SSA_NAME
>> +      || !single_imm_use (z, &use, &use_stmt_cond)
>> +      || gimple_code (use_stmt_cond) != GIMPLE_COND)
>> +    return stmt;
>> +
>> +  x = gimple_assign_rhs1 (stmt);
>> +  y = gimple_assign_rhs2 (stmt);
>> +
>> +  if (TREE_CODE (x) != SSA_NAME
>> +      || TREE_CODE (y) != SSA_NAME)
>> +    return stmt;
>> +
>> +  def_stmt_x = SSA_NAME_DEF_STMT (x);
>> +  def_stmt_y = SSA_NAME_DEF_STMT (y);
>> +
>> +  /* def_stmt_x and def_stmt_y should be of the
>> +     form
>> +
>> +     x = a cmp b
>> +     y = a cmp c
>> +
>> +     or
>> +
>> +     x = b cmp a
>> +     y = c cmp a
>> +  */
>> +  if (!is_gimple_assign (def_stmt_x)
>> +      || !is_gimple_assign (def_stmt_y)
>> +      || (gimple_assign_rhs_code (def_stmt_x)
>> +         != gimple_assign_rhs_code (def_stmt_y)))
>> +    return stmt;
>> +
>> +  if (gimple_assign_rhs1 (def_stmt_x) == gimple_assign_rhs1 (def_stmt_y)
>> +      && (gimple_assign_rhs_code (def_stmt_x) == LT_EXPR
>> +         || gimple_assign_rhs_code (def_stmt_x) == LE_EXPR))
>> +    {
>> +      a = gimple_assign_rhs1 (def_stmt_x);
>> +      b = gimple_assign_rhs2 (def_stmt_x);
>> +      c = gimple_assign_rhs2 (def_stmt_y);
>> +      is_lhs_of_comparison = true;
>> +    }
>> +  else
>> +    {
>> +      if (gimple_assign_rhs2 (def_stmt_x) == gimple_assign_rhs2 (def_stmt_y)
>> +         && (gimple_assign_rhs_code (def_stmt_x) == GT_EXPR
>> +             || gimple_assign_rhs_code (def_stmt_x) == GE_EXPR))
>> +       {
>> +         a = gimple_assign_rhs2 (def_stmt_x);
>> +         b = gimple_assign_rhs1 (def_stmt_x);
>> +         c = gimple_assign_rhs1 (def_stmt_y);
>> +       }
>> +      else
>> +       return stmt;
>> +    }
>> +
>> +  if (outermost_invariant_loop (b, loop_containing_stmt (def_stmt_x)) != 
>> NULL
>> +      && outermost_invariant_loop (c, loop_containing_stmt
>> (def_stmt_y)) != NULL)
>> +
>> +    {
>> +      if (dump_file)
>> +       fprintf (dump_file, "Found a potential transformation to min\n");
>> +
>> +      /* mintmp = min (b , c).  */
>> +
>> +      var = create_tmp_var (TREE_TYPE (b), "mintmp");
>> +      add_referenced_var (var);
>> +      t = fold_build2 (MIN_EXPR, TREE_TYPE (b), b, c);
>> +      stmt1 = gimple_build_assign (var, t);
>> +      name = make_ssa_name (var, stmt1);
>> +      gimple_assign_set_lhs (stmt1, name);
>> +      gimple_cond_set_code (use_stmt_cond, gimple_assign_rhs_code
>> (def_stmt_x));
>> +
>> +
>> +      if (is_lhs_of_comparison)
>> +       {
>> +         gimple_cond_set_lhs (use_stmt_cond, a);
>> +         gimple_cond_set_rhs (use_stmt_cond, name);
>> +       }
>> +      else
>> +       {
>> +         gimple_cond_set_lhs (use_stmt_cond, name);
>> +         gimple_cond_set_rhs (use_stmt_cond, a);
>> +       }
>> +      update_stmt (use_stmt_cond);
>> +      gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
>> +      return stmt1;
>> +    }
>> +
>> +  return stmt;
>> +}
>> +
>>  /* Check if the pattern at *BSI is a bittest of the form
>>    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
>>
>> @@ -1171,6 +1290,11 @@ determine_invariantness_stmt (struct
>> dom_walk_data *dw_data ATTRIBUTE_UNUSED,
>>              && TREE_CODE (op0) == SSA_NAME
>>              && has_single_use (op0))
>>            stmt = rewrite_bittest (&bsi);
>> +         else
>> +           if (pos == MOVE_POSSIBLE
>> +               && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR)
>> +             stmt = rewrite_min_test (&bsi);
>> +
>>        }
>>
>>       lim_data = init_lim_data (stmt);

Re: [RFC] Converting end of loop computations to MIN_EXPRs.

Reply via email to