Hi Richi,

> -----Original Message-----
> From: rguent...@ryzen.fritz.box <rguent...@ryzen.fritz.box> On Behalf Of
> Richard Biener
> Sent: Friday, November 20, 2020 9:54 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Tamar Christina <tamar.christ...@arm.com>
> Subject: [PATCH] Deal with (pattern) SLP consumed stmts in hybrid discovery
> 
> This makes hybrid SLP discovery deal with stmts indirectly consumed by SLP,
> for example via patterns.  This means that all uses of a stmt end up in SLP
> vectorized stmts.
> 
> This helps my prototype patches for PR97832 where I make SLP discovery re-
> associate chains to make operands match.  This ends up building SLP
> computation nodes without 1:1 representatives in the scalar IL and thus no
> scalar lane defs in SLP_TREE_SCALAR_STMTS.  Nevertheless all of the original
> scalar stmts are consumed so this represents another kind of SLP pattern for
> the computation chain result.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> Tamar - can you check if this helps you avoiding all the relevancy push/pop
> stuff as well as avoiding any pattern marking for SLP patterns at all?
> 

Looks like it does allow me to avoid the relevancy and SLP_TYPE markings:

=== vect_detect_hybrid_slp ===
Processing hybrid candidate : slp_patt_48 = .COMPLEX_MUL (_pa_50, _pa_49);
Marked SLP consumed stmt pure: slp_patt_48 = .COMPLEX_MUL (_pa_50, _pa_49);
Processing hybrid candidate : slp_patt_51 = .COMPLEX_MUL (_pa_53, _pa_52);
Marked SLP consumed stmt pure: slp_patt_51 = .COMPLEX_MUL (_pa_53, _pa_52);
Processing hybrid candidate : _24 = _9 * _19;
Marked SLP consumed stmt pure: _24 = _9 * _19;
Processing hybrid candidate : _23 = _10 * _18;
Marked SLP consumed stmt pure: _23 = _10 * _18;
Processing hybrid candidate : _22 = _9 * _18;
Marked SLP consumed stmt pure: _22 = _9 * _18;
Processing hybrid candidate : _17 = _10 * _19;
Marked SLP consumed stmt pure: _17 = _10 * _19;

And costing looks to be ignoring the unrelated statements

0x50bd800 REALPART_EXPR <*_3> 1 times unaligned_load (misalign -1) costs 1 in 
body
0x50bd800 REALPART_EXPR <*_5> 1 times unaligned_load (misalign -1) costs 1 in 
body
0x50bd800 .COMPLEX_MUL (_pa_53, _pa_52) 1 times vector_stmt costs 1 in body
0x50bd800 REALPART_EXPR <*_5> 1 times unaligned_load (misalign -1) costs 1 in 
body
0x50bd800 <unknown> 0 times vec_perm costs 0 in body
0x50bd800 .COMPLEX_MUL (_pa_50, _pa_49) 1 times vector_stmt costs 1 in body
0x50bd800 <unknown> 1 times vec_perm costs 2 in body
0x50bd800 _25 1 times unaligned_store (misalign -1) costs 1 in body

But it doesn't allow me to avoid the pattern markings.

Without the pattern markings it will crash when analyzing the loads

note:   ==> examining statement: _26 = REALPART_EXPR <*_5>;
note:   Vectorizing an unaligned access.
note:   vect_model_load_cost: unaligned supported by hardware.
note:   vect_model_load_cost: inside_cost = 1, prologue_cost = 0 .

as the statement it finds is not being used:

cadd.c:22:6: internal compiler error: in vect_slp_analyze_node_operations_1, at 
tree-vect-slp.c:3591
   22 | void f90 (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE 
complex c[restrict N])
      |      ^~~
0x17200e1 vect_slp_analyze_node_operations_1
        /gnu-work/src/gcc/gcc/tree-vect-slp.c:3591
0x17209e8 vect_slp_analyze_node_operations
        /gnu-work/src/gcc/gcc/tree-vect-slp.c:3784
0x1720995 vect_slp_analyze_node_operations
        /gnu-work/src/gcc/gcc/tree-vect-slp.c:3776
0x1720995 vect_slp_analyze_node_operations
        /gnu-work/src/gcc/gcc/tree-vect-slp.c:3776
0x1720995 vect_slp_analyze_node_operations
        /gnu-work/src/gcc/gcc/tree-vect-slp.c:3776
0x1720995 vect_slp_analyze_node_operations
        /gnu-work/src/gcc/gcc/tree-vect-slp.c:3776
0x17212f2 vect_slp_analyze_operations(vec_info*)
        /gnu-work/src/gcc/gcc/tree-vect-slp.c:3971
0x16e2c6e vect_analyze_loop_2
        /gnu-work/src/gcc/gcc/tree-vect-loop.c:2419
0x16e4bcd vect_analyze_loop(loop*, vec_info_shared*)
        /gnu-work/src/gcc/gcc/tree-vect-loop.c:2956
0x173ba4d try_vectorize_loop_1
        /gnu-work/src/gcc/gcc/tree-vectorizer.c:1010
0x173c23a try_vectorize_loop
        /gnu-work/src/gcc/gcc/tree-vectorizer.c:1163
0x173c470 vectorize_loops()
        /gnu-work/src/gcc/gcc/tree-vectorizer.c:1244
0x1579275 execute
        /gnu-work/src/gcc/gcc/tree-ssa-loop.c:414

I will assume the patch's existence in tree when I send out my COMPLEX_ADD 
patch today then.

Thanks!,
Tamar

> 2020-11-20  Richard Biener  <rguent...@suse.de>
> 
>       * tree-vect-slp.c (maybe_push_to_hybrid_worklist): New function.
>       (vect_detect_hybrid_slp): Use it.  Perform a backward walk
>       over the IL.
> ---
>  gcc/tree-vect-slp.c | 79
> +++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 72 insertions(+), 7 deletions(-)
> 
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index
> 486ee95d5d2..f87ac3c049f 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -3439,6 +3439,63 @@ vect_detect_hybrid_slp (tree *tp, int *, void
> *data)
>    return NULL_TREE;
>  }
> 
> +/* Look if STMT_INFO is consumed by SLP indirectly and mark it pure_slp
> +   if so, otherwise pushing it to WORKLIST.  */
> +
> +static void
> +maybe_push_to_hybrid_worklist (vec_info *vinfo,
> +                            vec<stmt_vec_info> &worklist,
> +                            stmt_vec_info stmt_info)
> +{
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                  "Processing hybrid candidate : %G", stmt_info->stmt);
> +  stmt_vec_info orig_info = vect_orig_stmt (stmt_info);
> +  imm_use_iterator iter2;
> +  ssa_op_iter iter1;
> +  use_operand_p use_p;
> +  def_operand_p def_p;
> +  bool any_def = false;
> +  FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_info->stmt, iter1,
> SSA_OP_DEF)
> +    {
> +      any_def = true;
> +      FOR_EACH_IMM_USE_FAST (use_p, iter2, DEF_FROM_PTR (def_p))
> +     {
> +       stmt_vec_info use_info = vinfo->lookup_stmt (USE_STMT (use_p));
> +       /* An out-of loop use means this is a loop_vect sink.  */
> +       if (!use_info)
> +         {
> +           if (dump_enabled_p ())
> +             dump_printf_loc (MSG_NOTE, vect_location,
> +                              "Found loop_vect sink: %G", stmt_info-
> >stmt);
> +           worklist.safe_push (stmt_info);
> +           return;
> +         }
> +       else if (!STMT_SLP_TYPE (vect_stmt_to_vectorize (use_info)))
> +         {
> +           if (dump_enabled_p ())
> +             dump_printf_loc (MSG_NOTE, vect_location,
> +                              "Found loop_vect use: %G", use_info->stmt);
> +           worklist.safe_push (stmt_info);
> +           return;
> +         }
> +     }
> +    }
> +  /* No def means this is a loo_vect sink.  */
> +  if (!any_def)
> +    {
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_NOTE, vect_location,
> +                      "Found loop_vect sink: %G", stmt_info->stmt);
> +      worklist.safe_push (stmt_info);
> +      return;
> +    }
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                  "Marked SLP consumed stmt pure: %G", stmt_info->stmt);
> +  STMT_SLP_TYPE (stmt_info) = pure_slp; }
> +
>  /* Find stmts that must be both vectorized and SLPed.  */
> 
>  void
> @@ -3448,9 +3505,14 @@ vect_detect_hybrid_slp (loop_vec_info
> loop_vinfo)
> 
>    /* All stmts participating in SLP are marked pure_slp, all other
>       stmts are loop_vect.
> -     First collect all loop_vect stmts into a worklist.  */
> +     First collect all loop_vect stmts into a worklist.
> +     SLP patterns cause not all original scalar stmts to appear in
> +     SLP_TREE_SCALAR_STMTS and thus not all of them are marked pure_slp.
> +     Rectify this here and do a backward walk over the IL only considering
> +     stmts as loop_vect when they are used by a loop_vect stmt and
> otherwise
> +     mark them as pure_slp.  */
>    auto_vec<stmt_vec_info> worklist;
> -  for (unsigned i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
> +  for (int i = LOOP_VINFO_LOOP (loop_vinfo)->num_nodes - 1; i >= 0;
> + --i)
>      {
>        basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
>        for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); @@ 
> -3459,10
> +3521,11 @@ vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
>         gphi *phi = gsi.phi ();
>         stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (phi);
>         if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT
> (stmt_info))
> -         worklist.safe_push (stmt_info);
> +         maybe_push_to_hybrid_worklist (loop_vinfo,
> +                                        worklist, stmt_info);
>       }
> -      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> -        gsi_next (&gsi))
> +      for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
> +        gsi_prev (&gsi))
>       {
>         gimple *stmt = gsi_stmt (gsi);
>         if (is_gimple_debug (stmt))
> @@ -3478,12 +3541,14 @@ vect_detect_hybrid_slp (loop_vec_info
> loop_vinfo)
>                   = loop_vinfo->lookup_stmt (gsi_stmt (gsi2));
>                 if (!STMT_SLP_TYPE (patt_info)
>                     && STMT_VINFO_RELEVANT (patt_info))
> -                 worklist.safe_push (patt_info);
> +                 maybe_push_to_hybrid_worklist (loop_vinfo,
> +                                                worklist, patt_info);
>               }
>             stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
>           }
>         if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT
> (stmt_info))
> -         worklist.safe_push (stmt_info);
> +         maybe_push_to_hybrid_worklist (loop_vinfo,
> +                                        worklist, stmt_info);
>       }
>      }
> 
> --
> 2.26.2

Reply via email to