On Tue, Oct 1, 2024 at 5:04 AM Richard Biener <rguent...@suse.de> wrote:
>
> The following adds SLP support for vectorizing single-lane inductions
> with variable length vectors.

This introduces a bootstrap failure on aarch64 due to a maybe
uninitialized variable.

    inlined from ‘bool vectorizable_induction(loop_vec_info,
stmt_vec_info, gimple**, slp_tree, stmt_vector_for_cost*)’ at
/home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc:10718:33:
/home/linaro/src/upstream-gcc/gcc/gcc/gimple-fold.h:183:25: error:
‘vec_init’ may be used uninitialized [-Werror=maybe-uninitialized]
  183 |   return gimple_convert (&gsi, false, GSI_CONTINUE_LINKING,
      |          ~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  184 |                          UNKNOWN_LOCATION, type, op);
      |                          ~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc: In function
‘bool vectorizable_induction(loop_vec_info, stmt_vec_info, gimple**,
slp_tree, stmt_vector_for_cost*)’:
/home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc:10281:17:
note: ‘vec_init’ was declared here
10281 |   tree new_vec, vec_init, vec_step, t;
      |                 ^~~~~~~~


The issue is around line 10718:
          if (init_node)
            vec_init = vect_get_slp_vect_def (init_node, ivn);
          if (!nested_in_vect_loop
              && step_mul
              && !integer_zerop (step_mul))
            {
              gcc_assert (invariant);
              vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);

it is hard to follow the code to see if it is actually uninitialized or not.

Thanks,
Andrew

>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
>         PR tree-optimization/116566
>         * tree-vect-loop.cc (vectorizable_induction): Handle single-lane
>         SLP for VLA vectors.
> ---
>  gcc/tree-vect-loop.cc | 247 ++++++++++++++++++++++++++++++++----------
>  1 file changed, 189 insertions(+), 58 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index a5a44613cb2..f5ecf0bdb80 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10283,7 +10283,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>    gimple *new_stmt;
>    gphi *induction_phi;
>    tree induc_def, vec_dest;
> -  tree init_expr, step_expr;
>    poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>    unsigned i;
>    tree expr;
> @@ -10369,7 +10368,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>      iv_loop = loop;
>    gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
>
> -  if (slp_node && !nunits.is_constant ())
> +  if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1))
>      {
>        /* The current SLP code creates the step value element-by-element.  */
>        if (dump_enabled_p ())
> @@ -10387,7 +10386,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>        return false;
>      }
>
> -  step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
> +  tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
>    gcc_assert (step_expr != NULL_TREE);
>    if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
>        && !type_has_mode_precision_p (TREE_TYPE (step_expr)))
> @@ -10475,9 +10474,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>         [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2].  */
>    if (slp_node)
>      {
> -      /* Enforced above.  */
> -      unsigned int const_nunits = nunits.to_constant ();
> -
>        /* The initial values are vectorized, but any lanes > group_size
>          need adjustment.  */
>        slp_tree init_node
> @@ -10499,11 +10495,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>
>        /* Now generate the IVs.  */
>        unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
> -      gcc_assert ((const_nunits * nvects) % group_size == 0);
> +      gcc_assert (multiple_p (nunits * nvects, group_size));
>        unsigned nivs;
> +      unsigned HOST_WIDE_INT const_nunits;
>        if (nested_in_vect_loop)
>         nivs = nvects;
> -      else
> +      else if (nunits.is_constant (&const_nunits))
>         {
>           /* Compute the number of distinct IVs we need.  First reduce
>              group_size if it is a multiple of const_nunits so we get
> @@ -10514,21 +10511,43 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>           nivs = least_common_multiple (group_sizep,
>                                         const_nunits) / const_nunits;
>         }
> +      else
> +       {
> +         gcc_assert (SLP_TREE_LANES (slp_node) == 1);
> +         nivs = 1;
> +       }
> +      gimple_seq init_stmts = NULL;
>        tree stept = TREE_TYPE (step_vectype);
>        tree lupdate_mul = NULL_TREE;
>        if (!nested_in_vect_loop)
>         {
> -         /* The number of iterations covered in one vector iteration.  */
> -         unsigned lup_mul = (nvects * const_nunits) / group_size;
> -         lupdate_mul
> -           = build_vector_from_val (step_vectype,
> -                                    SCALAR_FLOAT_TYPE_P (stept)
> -                                    ? build_real_from_wide (stept, lup_mul,
> -                                                            UNSIGNED)
> -                                    : build_int_cstu (stept, lup_mul));
> +         if (nunits.is_constant (&const_nunits))
> +           {
> +             /* The number of iterations covered in one vector iteration.  */
> +             unsigned lup_mul = (nvects * const_nunits) / group_size;
> +             lupdate_mul
> +               = build_vector_from_val (step_vectype,
> +                                        SCALAR_FLOAT_TYPE_P (stept)
> +                                        ? build_real_from_wide (stept, 
> lup_mul,
> +                                                                UNSIGNED)
> +                                        : build_int_cstu (stept, lup_mul));
> +           }
> +         else
> +           {
> +             if (SCALAR_FLOAT_TYPE_P (stept))
> +               {
> +                 tree tem = build_int_cst (integer_type_node, vf);
> +                 lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
> +                                             stept, tem);
> +               }
> +             else
> +               lupdate_mul = build_int_cst (stept, vf);
> +             lupdate_mul = gimple_build_vector_from_val (&init_stmts,
> +                                                         step_vectype,
> +                                                         lupdate_mul);
> +           }
>         }
>        tree peel_mul = NULL_TREE;
> -      gimple_seq init_stmts = NULL;
>        if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
>         {
>           if (SCALAR_FLOAT_TYPE_P (stept))
> @@ -10540,44 +10559,105 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>           peel_mul = gimple_build_vector_from_val (&init_stmts,
>                                                    step_vectype, peel_mul);
>         }
> +      tree step_mul = NULL_TREE;
>        unsigned ivn;
>        auto_vec<tree> vec_steps;
>        for (ivn = 0; ivn < nivs; ++ivn)
>         {
> -         tree_vector_builder step_elts (step_vectype, const_nunits, 1);
> -         tree_vector_builder init_elts (vectype, const_nunits, 1);
> -         tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
> -         for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
> +         gimple_seq stmts = NULL;
> +         bool invariant = true;
> +         if (nunits.is_constant (&const_nunits))
>             {
> -             /* The scalar steps of the IVs.  */
> -             tree elt = steps[(ivn*const_nunits + eltn) % group_size];
> -             elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), 
> elt);
> -             step_elts.quick_push (elt);
> +             tree_vector_builder step_elts (step_vectype, const_nunits, 1);
> +             tree_vector_builder init_elts (vectype, const_nunits, 1);
> +             tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
> +             for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
> +               {
> +                 /* The scalar steps of the IVs.  */
> +                 tree elt = steps[(ivn*const_nunits + eltn) % group_size];
> +                 elt = gimple_convert (&init_stmts,
> +                                       TREE_TYPE (step_vectype), elt);
> +                 step_elts.quick_push (elt);
> +                 if (!init_node)
> +                   {
> +                     /* The scalar inits of the IVs if not vectorized.  */
> +                     elt = inits[(ivn*const_nunits + eltn) % group_size];
> +                     if (!useless_type_conversion_p (TREE_TYPE (vectype),
> +                                                     TREE_TYPE (elt)))
> +                       elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> +                                           TREE_TYPE (vectype), elt);
> +                     init_elts.quick_push (elt);
> +                   }
> +                 /* The number of steps to add to the initial values.  */
> +                 unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
> +                 mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
> +                                      ? build_real_from_wide (stept, mul_elt,
> +                                                              UNSIGNED)
> +                                      : build_int_cstu (stept, mul_elt));
> +               }
> +             vec_step = gimple_build_vector (&init_stmts, &step_elts);
> +             step_mul = gimple_build_vector (&init_stmts, &mul_elts);
>               if (!init_node)
> +               vec_init = gimple_build_vector (&init_stmts, &init_elts);
> +           }
> +         else
> +           {
> +             if (init_node)
> +               ;
> +             else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
> +               {
> +                 new_name = gimple_convert (&init_stmts, stept, inits[0]);
> +                 /* Build the initial value directly as a VEC_SERIES_EXPR.  
> */
> +                 vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
> +                                          step_vectype, new_name, steps[0]);
> +                 if (!useless_type_conversion_p (vectype, step_vectype))
> +                   vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> +                                            vectype, vec_init);
> +               }
> +             else
>                 {
> -                 /* The scalar inits of the IVs if not vectorized.  */
> -                 elt = inits[(ivn*const_nunits + eltn) % group_size];
> -                 if (!useless_type_conversion_p (TREE_TYPE (vectype),
> -                                                 TREE_TYPE (elt)))
> -                   elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> -                                       TREE_TYPE (vectype), elt);
> -                 init_elts.quick_push (elt);
> +                 /* Build:
> +                      [base, base, base, ...]
> +                      + (vectype) [0, 1, 2, ...] * [step, step, step, ...].  
> */
> +                 gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
> +                 gcc_assert (flag_associative_math);
> +                 tree index = build_index_vector (step_vectype, 0, 1);
> +                 new_name = gimple_convert (&init_stmts, TREE_TYPE 
> (steps[0]),
> +                                            inits[0]);
> +                 tree base_vec = gimple_build_vector_from_val (&init_stmts,
> +                                                               step_vectype,
> +                                                               new_name);
> +                 tree step_vec = gimple_build_vector_from_val (&init_stmts,
> +                                                               step_vectype,
> +                                                               steps[0]);
> +                 vec_init = gimple_build (&init_stmts, FLOAT_EXPR,
> +                                          step_vectype, index);
> +                 vec_init = gimple_build (&init_stmts, MULT_EXPR,
> +                                          step_vectype, vec_init, step_vec);
> +                 vec_init = gimple_build (&init_stmts, PLUS_EXPR,
> +                                          step_vectype, vec_init, base_vec);
> +                 if (!useless_type_conversion_p (vectype, step_vectype))
> +                   vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> +                                            vectype, vec_init);
>                 }
> -             /* The number of steps to add to the initial values.  */
> -             unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
> -             mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
> -                                  ? build_real_from_wide (stept,
> -                                                          mul_elt, UNSIGNED)
> -                                  : build_int_cstu (stept, mul_elt));
> +             /* iv_loop is nested in the loop to be vectorized. Generate:
> +                vec_step = [S, S, S, S]  */
> +             t = unshare_expr (steps[0]);
> +             gcc_assert (CONSTANT_CLASS_P (t)
> +                         || TREE_CODE (t) == SSA_NAME);
> +             vec_step = gimple_build_vector_from_val (&init_stmts,
> +                                                      step_vectype, t);
>             }
> -         vec_step = gimple_build_vector (&init_stmts, &step_elts);
>           vec_steps.safe_push (vec_step);
> -         tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
>           if (peel_mul)
> -           step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype,
> -                                    step_mul, peel_mul);
> -         if (!init_node)
> -           vec_init = gimple_build_vector (&init_stmts, &init_elts);
> +           {
> +             if (!step_mul)
> +               step_mul = peel_mul;
> +             else
> +               step_mul = gimple_build (&init_stmts,
> +                                        MINUS_EXPR, step_vectype,
> +                                        step_mul, peel_mul);
> +           }
>
>           /* Create the induction-phi that defines the induction-operand.  */
>           vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
> @@ -10588,9 +10668,38 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>           /* Create the iv update inside the loop  */
>           tree up = vec_step;
>           if (lupdate_mul)
> -           up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
> -                              vec_step, lupdate_mul);
> -         gimple_seq stmts = NULL;
> +           {
> +             if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> +               {
> +                 /* When we're using loop_len produced by SELEC_VL, the
> +                    non-final iterations are not always processing VF
> +                    elements.  So vectorize induction variable instead of
> +
> +                      _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> +                    We should generate:
> +
> +                      _35 = .SELECT_VL (ivtmp_33, VF);
> +                      vect_cst__22 = [vec_duplicate_expr] _35;
> +                      _21 = vect_vec_iv_.6_22 + vect_cst__22;  */
> +                 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> +                 tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> +                                               vectype, 0, 0);
> +                 if (SCALAR_FLOAT_TYPE_P (stept))
> +                   expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
> +                 else
> +                   expr = gimple_convert (&stmts, stept, len);
> +                 lupdate_mul = gimple_build_vector_from_val (&stmts,
> +                                                             step_vectype,
> +                                                             expr);
> +                 up = gimple_build (&stmts, MULT_EXPR,
> +                                    step_vectype, vec_step, lupdate_mul);
> +               }
> +             else
> +               up = gimple_build (&init_stmts,
> +                                  MULT_EXPR, step_vectype,
> +                                  vec_step, lupdate_mul);
> +           }
>           vec_def = gimple_convert (&stmts, step_vectype, induc_def);
>           vec_def = gimple_build (&stmts,
>                                   PLUS_EXPR, step_vectype, vec_def, up);
> @@ -10602,8 +10711,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>           if (init_node)
>             vec_init = vect_get_slp_vect_def (init_node, ivn);
>           if (!nested_in_vect_loop
> +             && step_mul
>               && !integer_zerop (step_mul))
>             {
> +             gcc_assert (invariant);
>               vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
>               up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
>                                  vec_step, step_mul);
> @@ -10620,8 +10731,11 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>        if (!nested_in_vect_loop)
>         {
>           /* Fill up to the number of vectors we need for the whole group.  */
> -         nivs = least_common_multiple (group_size,
> -                                       const_nunits) / const_nunits;
> +         if (nunits.is_constant (&const_nunits))
> +           nivs = least_common_multiple (group_size,
> +                                         const_nunits) / const_nunits;
> +         else
> +           nivs = 1;
>           vec_steps.reserve (nivs-ivn);
>           for (; ivn < nivs; ++ivn)
>             {
> @@ -10634,14 +10748,31 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>          stmts by adding VF' * stride to the IVs generated above.  */
>        if (ivn < nvects)
>         {
> -         unsigned vfp
> -           = least_common_multiple (group_size, const_nunits) / group_size;
> -         tree lupdate_mul
> -           = build_vector_from_val (step_vectype,
> -                                    SCALAR_FLOAT_TYPE_P (stept)
> -                                    ? build_real_from_wide (stept,
> -                                                            vfp, UNSIGNED)
> -                                    : build_int_cstu (stept, vfp));
> +         if (nunits.is_constant (&const_nunits))
> +           {
> +             unsigned vfp = (least_common_multiple (group_size, const_nunits)
> +                             / group_size);
> +             lupdate_mul
> +               = build_vector_from_val (step_vectype,
> +                                        SCALAR_FLOAT_TYPE_P (stept)
> +                                        ? build_real_from_wide (stept,
> +                                                                vfp, 
> UNSIGNED)
> +                                        : build_int_cstu (stept, vfp));
> +           }
> +         else
> +           {
> +             if (SCALAR_FLOAT_TYPE_P (stept))
> +               {
> +                 tree tem = build_int_cst (integer_type_node, nunits);
> +                 lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
> +                                             stept, tem);
> +               }
> +             else
> +               lupdate_mul = build_int_cst (stept, nunits);
> +             lupdate_mul = gimple_build_vector_from_val (&init_stmts,
> +                                                         step_vectype,
> +                                                         lupdate_mul);
> +           }
>           for (; ivn < nvects; ++ivn)
>             {
>               gimple *iv
> @@ -10673,7 +10804,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
>        return true;
>      }
>
> -  init_expr = vect_phi_initial_value (phi);
> +  tree init_expr = vect_phi_initial_value (phi);
>
>    gimple_seq stmts = NULL;
>    if (!nested_in_vect_loop)
> --
> 2.43.0

Reply via email to