On Wed, Nov 27, 2013 at 1:53 AM, Richard Biener <rguent...@suse.de> wrote:
> On Fri, 22 Nov 2013, Cong Hou wrote:
>
>> Hi
>>
>> Currently in GCC vectorization, some loop invariant may be detected
>> after aliasing checks, which can be hoisted outside of the loop. The
>> current method in GCC may break the information built during the
>> analysis phase, causing some crash (see PR59006 and PR58921).
>>
>> This patch improves the loop invariant hoisting by delaying it until
>> all statements are vectorized, thereby keeping all built information.
>> But those loop invariant statements won't be vectorized, and if a
>> variable is defined by one of those loop invariant, it is treated as
>> an external definition.
>>
>> Bootstrapped and testes on an x86-64 machine.
>
> Hmm.  I'm still thinking that we should handle this during the regular
> transform step.
>
> Like with the following incomplete patch.  Missing is adjusting
> the rest of the vectorizable_* functions to handle the case where all defs
> are dt_external or constant by setting their own STMT_VINFO_DEF_TYPE to
> dt_external.  From the gcc.dg/vect/pr58508.c we get only 4 hoists
> instead of 8 because of this (I think).
>
> Also gcc.dg/vect/pr52298.c ICEs for yet unanalyzed reason.
>
> I can take over the bug if you like.
>
> Thanks,
> Richard.
>
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> *** gcc/tree-vect-data-refs.c   (revision 205435)
> --- gcc/tree-vect-data-refs.c   (working copy)
> *************** again:
> *** 3668,3673 ****
> --- 3668,3682 ----
>             }
>           STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
>         }
> +       else if (loop_vinfo
> +              && integer_zerop (DR_STEP (dr)))
> +       {
> +         /* All loads from a non-varying address will be disambiguated
> +            by data-ref analysis or via a runtime alias check and thus
> +            they will become invariant.  Force them to be vectorized
> +            as external.  */
> +         STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def;
> +       }
>       }
>
>     /* If we stopped analysis at the first dataref we could not analyze


I agree that setting the statement that loads a data-ref with zero
step as vect_external_def early at this point is a good idea. This
avoids two loop analyses seeing inconsistent def-info if we do this
later. Note with this change the following loop in PR59006 will not be
vectorized:


int a[8], b;

void fn1(void) {
  int c;
  for (; b; b++) {
    int d = a[b];
    c = a[0] ? d : 0;
    a[b] = c;
  }
}

This is because the load to a[0] is now treated as an external def, in
which case vectype cannot be found for the condition of the
conditional expression, while vectorizable_condition requires that
comp_vectype should be set properly. We can treat it as a missed
optimization.



> Index: gcc/tree-vect-loop-manip.c
> ===================================================================
> *** gcc/tree-vect-loop-manip.c  (revision 205435)
> --- gcc/tree-vect-loop-manip.c  (working copy)
> *************** vect_loop_versioning (loop_vec_info loop
> *** 2269,2275 ****
>
>     /* Extract load statements on memrefs with zero-stride accesses.  */
>
> !   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
>       {
>         /* In the loop body, we iterate each statement to check if it is a 
> load.
>          Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
> --- 2269,2275 ----
>
>     /* Extract load statements on memrefs with zero-stride accesses.  */
>
> !   if (0 && LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
>       {
>         /* In the loop body, we iterate each statement to check if it is a 
> load.
>          Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
> Index: gcc/tree-vect-loop.c
> ===================================================================
> *** gcc/tree-vect-loop.c        (revision 205435)
> --- gcc/tree-vect-loop.c        (working copy)
> *************** vect_transform_loop (loop_vec_info loop_
> *** 5995,6000 ****
> --- 5995,6020 ----
>                 }
>             }
>
> +         /* If the stmt is loop invariant simply move it.  */
> +         if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_external_def)
> +           {
> +             if (dump_enabled_p ())
> +               {
> +                 dump_printf_loc (MSG_NOTE, vect_location,
> +                                  "hoisting out of the vectorized loop: ");
> +                 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
> +                 dump_printf (MSG_NOTE, "\n");
> +               }
> +             gsi_remove (&si, false);
> +             if (gimple_vuse (stmt))
> +               gimple_set_vuse (stmt, NULL);
> +             basic_block new_bb;
> +             new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge 
> (loop),
> +                                                    stmt);
> +             gcc_assert (!new_bb);
> +             continue;
> +           }
> +
>           /* -------- vectorize statement ------------ */
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_NOTE, vect_location, "transform 
> statement.\n");
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> *** gcc/tree-vect-stmts.c       (revision 205435)
> --- gcc/tree-vect-stmts.c       (working copy)
> *************** vectorizable_operation (gimple stmt, gim
> *** 3497,3502 ****
> --- 3497,3503 ----
>     vec<tree> vec_oprnds2 = vNULL;
>     tree vop0, vop1, vop2;
>     bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> +   bool all_ops_external = true;
>     int vf;
>
>     if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> *************** vectorizable_operation (gimple stmt, gim
> *** 3557,3562 ****
> --- 3558,3565 ----
>                            "use not simple.\n");
>         return false;
>       }
> +   if (dt[0] != vect_external_def && dt[0] != vect_constant_def)
> +     all_ops_external = false;
>     /* If op0 is an external or constant def use a vector type with
>        the same size as the output vector type.  */
>     if (!vectype)
> *************** vectorizable_operation (gimple stmt, gim
> *** 3593,3598 ****
> --- 3596,3603 ----
>                                "use not simple.\n");
>           return false;
>         }
> +       if (dt[1] != vect_external_def && dt[1] != vect_constant_def)
> +       all_ops_external = false;
>       }
>     if (op_type == ternary_op)
>       {
> *************** vectorizable_operation (gimple stmt, gim
> *** 3605,3610 ****
> --- 3610,3623 ----
>                                "use not simple.\n");
>           return false;
>         }
> +       if (dt[2] != vect_external_def && dt[2] != vect_constant_def)
> +       all_ops_external = false;
> +     }
> +
> +   if (all_ops_external && loop_vinfo)
> +     {
> +       STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def;
> +       return true;
>       }
>
>     if (loop_vinfo)
> *************** vect_analyze_stmt (gimple stmt, bool *ne
> *** 5771,5779 ****
>                        || relevance == vect_unused_in_scope));
>            break;
>
>         case vect_induction_def:
>         case vect_constant_def:
> -       case vect_external_def:
>         case vect_unknown_def_type:
>         default:
>           gcc_unreachable ();
> --- 5784,5795 ----
>                        || relevance == vect_unused_in_scope));
>            break;
>
> +       case vect_external_def:
> +       /* If we decided a stmt is invariant don't bother to vectorize it.  */
> +       return true;
> +
>         case vect_induction_def:
>         case vect_constant_def:
>         case vect_unknown_def_type:
>         default:
>           gcc_unreachable ();
>

In this manner all other loop invariants are detected in the
transformation phase. I am not sure if it is tedious to do this in
every vectorizable_* function. We could also do this just after
detecting those invariant loads during data-ref analysis. What do you
think?

Please also feel free to take this bug. I have a coming trip and could
not work on it in five days.

Thank you very much!


Cong


>
>>
>> thanks,
>> Cong
>>
>>
>>
>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>> index 2c0554b..0614bab 100644
>> --- a/gcc/ChangeLog
>> +++ b/gcc/ChangeLog
>> @@ -1,3 +1,18 @@
>> +2013-11-22  Cong Hou  <co...@google.com>
>> +
>> + PR tree-optimization/58921
>> + PR tree-optimization/59006
>> + * tree-vectorizer.h (struct _stmt_vec_info): New data member
>> + loop_invariant.
>> + * tree-vect-loop-manip.c (vect_loop_versioning): Delay hoisting loop
>> + invariants until all statements are vectorized.
>> + * tree-vect-loop.c (vect_hoist_loop_invariants): New functions.
>> + (vect_transform_loop): Hoist loop invariants after all statements
>> + are vectorized.  Do not vectorize loop invariants stmts.
>> + * tree-vect-stmts.c (vect_get_vec_def_for_operand): Treat a loop
>> + invariant as an external definition.
>> + (new_stmt_vec_info): Initialize new data member.
>> +
>>  2013-11-12  Jeff Law  <l...@redhat.com>
>>
>>   * tree-ssa-threadedge.c (thread_around_empty_blocks): New
>> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
>> index 09c7f20..447625b 100644
>> --- a/gcc/testsuite/ChangeLog
>> +++ b/gcc/testsuite/ChangeLog
>> @@ -1,3 +1,10 @@
>> +2013-11-22  Cong Hou  <co...@google.com>
>> +
>> + PR tree-optimization/58921
>> + PR tree-optimization/59006
>> + * gcc.dg/vect/pr58921.c: New test.
>> + * gcc.dg/vect/pr59006.c: New test.
>> +
>>  2013-11-12  Balaji V. Iyer  <balaji.v.i...@intel.com>
>>
>>   * gcc.dg/cilk-plus/cilk-plus.exp: Added a check for LTO before running
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr58921.c
>> b/gcc/testsuite/gcc.dg/vect/pr58921.c
>> new file mode 100644
>> index 0000000..ee3694a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr58921.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target vect_int } */
>> +
>> +int a[7];
>> +int b;
>> +
>> +void
>> +fn1 ()
>> +{
>> +  for (; b; b++)
>> +    a[b] = ((a[b] <= 0) == (a[0] != 0));
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
>> +/* { dg-final { cleanup-tree-dump "vect" } } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr59006.c
>> b/gcc/testsuite/gcc.dg/vect/pr59006.c
>> new file mode 100644
>> index 0000000..95d90a9
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr59006.c
>> @@ -0,0 +1,24 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target vect_int } */
>> +
>> +int a[8], b;
>> +
>> +void fn1 (void)
>> +{
>> +  int c;
>> +  for (; b; b++)
>> +    {
>> +      int d = a[b];
>> +      c = a[0] ? d : 0;
>> +      a[b] = c;
>> +    }
>> +}
>> +
>> +void fn2 ()
>> +{
>> +  for (; b <= 0; b++)
>> +    a[b] = a[0] || b;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
>> +/* { dg-final { cleanup-tree-dump "vect" } } */
>> diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
>> index 15227856..3adc73d 100644
>> --- a/gcc/tree-vect-loop-manip.c
>> +++ b/gcc/tree-vect-loop-manip.c
>> @@ -2448,8 +2448,12 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
>>    FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
>>      {
>>        gimple def = SSA_NAME_DEF_STMT (var);
>> +      stmt_vec_info def_stmt_info;
>> +
>>        if (!gimple_nop_p (def)
>> -  && flow_bb_inside_loop_p (loop, gimple_bb (def)))
>> +  && flow_bb_inside_loop_p (loop, gimple_bb (def))
>> +  && !((def_stmt_info = vinfo_for_stmt (def))
>> + && STMT_VINFO_LOOP_INVARIANT_P (def_stmt_info)))
>>   {
>>    hoist = false;
>>    break;
>> @@ -2458,21 +2462,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
>>
>>    if (hoist)
>>      {
>> -      if (dr)
>> - gimple_set_vuse (stmt, NULL);
>> -
>> -      gsi_remove (&si, false);
>> -      gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
>> -    stmt);
>> -
>> -      if (dump_enabled_p ())
>> - {
>> -  dump_printf_loc
>> -      (MSG_NOTE, vect_location,
>> -       "hoisting out of the vectorized loop: ");
>> -  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
>> -  dump_printf (MSG_NOTE, "\n");
>> - }
>> +      STMT_VINFO_LOOP_INVARIANT_P (stmt_info) = true;
>> +      gsi_next (&si);
>>        continue;
>>      }
>>   }
>> @@ -2481,6 +2472,7 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
>>   }
>>      }
>>
>> +
>>    /* End loop-exit-fixes after versioning.  */
>>
>>    if (cond_expr_stmt_list)
>> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>> index 292e771..148f9f1 100644
>> --- a/gcc/tree-vect-loop.c
>> +++ b/gcc/tree-vect-loop.c
>> @@ -5572,6 +5572,49 @@ vect_loop_kill_debug_uses (struct loop *loop,
>> gimple stmt)
>>      }
>>  }
>>
>> +/* Find all loop invariants detected after alias checks, and hoist them
>> +   before the loop preheader.  */
>> +
>> +static void
>> +vect_hoist_loop_invariants (loop_vec_info loop_vinfo)
>> +{
>> +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>> +  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
>> +  gimple_seq loop_invariants = NULL;
>> +
>> +  for (int i = 0; i < (int)loop->num_nodes; i++)
>> +    {
>> +      basic_block bb = bbs[i];
>> +      for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);)
>> + {
>> +  gimple stmt = gsi_stmt (si);
>> +  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
>> +  if (stmt_vinfo && STMT_VINFO_LOOP_INVARIANT_P (stmt_vinfo))
>> +    {
>> +      if (gimple_has_mem_ops (stmt))
>> + gimple_set_vuse (stmt, NULL);
>> +
>> +      gsi_remove (&si, false);
>> +      gimple_seq_add_stmt (&loop_invariants, stmt);
>> +
>> +      if (dump_enabled_p ())
>> + {
>> +  dump_printf_loc
>> +    (MSG_NOTE, vect_location,
>> +     "hoisting out of the vectorized loop: ");
>> +  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
>> +  dump_printf (MSG_NOTE, "\n");
>> + }
>> +    }
>> +  else
>> +    gsi_next (&si);
>> + }
>> +    }
>> +  basic_block pre_header = loop_preheader_edge (loop)->src;
>> +  gcc_assert (EDGE_COUNT (pre_header->preds) == 1);
>> +  gsi_insert_seq_on_edge_immediate (EDGE_PRED (pre_header, 0),
>> loop_invariants);
>> +}
>> +
>>  /* Function vect_transform_loop.
>>
>>     The analysis phase has determined that the loop is vectorizable.
>> @@ -5835,6 +5878,15 @@ vect_transform_loop (loop_vec_info loop_vinfo)
>>   transform_pattern_stmt = false;
>>              }
>>
>> +          /* If stmt is a loop invariant (detected after alias checks),
>> +             do not generate the vectorized stmt for it as it will be
>> +             hoisted later.  */
>> +  if (STMT_VINFO_LOOP_INVARIANT_P (stmt_info))
>> +    {
>> +      gsi_next (&si);
>> +      continue;
>> +    }
>> +
>>    gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
>>    nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
>>                                                 STMT_VINFO_VECTYPE 
>> (stmt_info));
>> @@ -5910,6 +5962,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
>>   }        /* stmts in BB */
>>      } /* BBs in loop */
>>
>> +  /* Hoist all loop invariants.  */
>> +  vect_hoist_loop_invariants (loop_vinfo);
>> +
>>    slpeel_make_loop_iterate_ntimes (loop, ratio);
>>
>>    /* Reduce loop iterations by the vectorization factor.  */
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index b0e0fa9..3e15372 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -1362,6 +1362,18 @@ vect_get_vec_def_for_operand (tree op, gimple
>> stmt, tree *scalar_def)
>>          }
>>      }
>>
>> +  /* After alias checks, some loop invariants may be detected, and we won't
>> +     generate vectorized stmts for them.  We only hoist them after all stmts
>> +     are vectorized.  Here if we meet a loop invariant, we need to assume it
>> +     is already hoisted before the loop.  We do this by setting the def-type
>> +     to vect_external_def.  */
>> +  if (def_stmt && dt == vect_internal_def)
>> +    {
>> +      stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
>> +      if (stmt_vinfo && STMT_VINFO_LOOP_INVARIANT_P (stmt_vinfo))
>> + dt = vect_external_def;
>> +    }
>> +
>>    switch (dt)
>>      {
>>      /* Case 1: operand is a constant.  */
>> @@ -6083,6 +6095,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info 
>> loop_vinfo,
>>    STMT_VINFO_BB_VINFO (res) = bb_vinfo;
>>    STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
>>    STMT_VINFO_LIVE_P (res) = false;
>> +  STMT_VINFO_LOOP_INVARIANT_P (res) = false;
>>    STMT_VINFO_VECTYPE (res) = NULL;
>>    STMT_VINFO_VEC_STMT (res) = NULL;
>>    STMT_VINFO_VECTORIZABLE (res) = true;
>> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
>> index bbd50e1..2c230f9 100644
>> --- a/gcc/tree-vectorizer.h
>> +++ b/gcc/tree-vectorizer.h
>> @@ -516,6 +516,10 @@ typedef struct _stmt_vec_info {
>>       used outside the loop.  */
>>    bool live;
>>
>> +  /* Indicates whether this stmt is a loop invariant, which can be hoisted.
>> +     A stmt may become loop invariant after alias checks.  */
>> +  bool loop_invariant;
>> +
>>    /* Stmt is part of some pattern (computation idiom)  */
>>    bool in_pattern_p;
>>
>> @@ -623,6 +627,7 @@ typedef struct _stmt_vec_info {
>>  #define STMT_VINFO_BB_VINFO(S)             (S)->bb_vinfo
>>  #define STMT_VINFO_RELEVANT(S)             (S)->relevant
>>  #define STMT_VINFO_LIVE_P(S)               (S)->live
>> +#define STMT_VINFO_LOOP_INVARIANT_P(S)     (S)->loop_invariant
>>  #define STMT_VINFO_VECTYPE(S)              (S)->vectype
>>  #define STMT_VINFO_VEC_STMT(S)             (S)->vectorized_stmt
>>  #define STMT_VINFO_VECTORIZABLE(S)         (S)->vectorizable
>>
>
> --
> Richard Biener <rguent...@suse.de>
> SUSE / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer

Reply via email to