On Thu, Sep 14, 2017 at 1:20 PM, Richard Sandiford
<richard.sandif...@linaro.org> wrote:
> This patch makes can_vec_perm_p & co. take a vec<>, wrapped in new
> typedefs vec_perm_indices and auto_vec_perm_indices.  There are two
> reasons for doing this for SVE:
>
> (1) it means that the number of elements is bundled with the elements
>     themselves, and is obviously constant.
>
> (2) it makes it easier to change the "unsigned char" element type to
>     something wider.
>
> I'm happy to change the target hooks as a follow-on patch, if this is OK.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> OK to install?

Ok.

Richard.

> Richard
>
>
> 2017-09-14  Richard Sandiford  <richard.sandif...@linaro.org>
>             Alan Hayward  <alan.hayw...@arm.com>
>             David Sherwood  <david.sherw...@arm.com>
>
> gcc/
>         * target.h (vec_perm_indices): New typedef.
>         (auto_vec_perm_indices): Likewise.
>         * optabs-query.h: Include target.h
>         (can_vec_perm_p): Take a vec_perm_indices *.
>         * optabs-query.c (can_vec_perm_p): Likewise.
>         (can_mult_highpart_p): Update accordingly.  Use auto_vec_perm_indices.
>         * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise.
>         * tree-vect-generic.c (lower_vec_perm): Likewise.
>         * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise.
>         (vect_grouped_load_supported): Likewise.
>         (vect_shift_permute_load_chain): Likewise.
>         (vect_permute_store_chain): Use auto_vec_perm_indices.
>         (vect_permute_load_chain): Likewise.
>         * fold-const.c (fold_vec_perm): Take vec_perm_indices.
>         (fold_ternary_loc): Update accordingly.  Use auto_vec_perm_indices.
>         Update uses of can_vec_perm_p.
>         * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Replace the
>         mode with a number of elements.  Take a vec_perm_indices *.
>         (vect_create_epilog_for_reduction): Update accordingly.
>         Use auto_vec_perm_indices.
>         (have_whole_vector_shift): Likewise.  Update call to can_vec_perm_p.
>         * tree-vect-slp.c (vect_build_slp_tree_1): Likewise.
>         (vect_transform_slp_perm_load): Likewise.
>         (vect_schedule_slp_instance): Use auto_vec_perm_indices.
>         * tree-vectorizer.h (vect_gen_perm_mask_any): Take a vec_perm_indices.
>         (vect_gen_perm_mask_checked): Likewise.
>         * tree-vect-stmts.c (vect_gen_perm_mask_any): Take a vec_perm_indices.
>         (vect_gen_perm_mask_checked): Likewise.
>         (vectorizable_mask_load_store): Use auto_vec_perm_indices.
>         (vectorizable_store): Likewise.
>         (vectorizable_load): Likewise.
>         (perm_mask_for_reverse): Likewise.  Update call to can_vec_perm_p.
>         (vectorizable_bswap): Likewise.
>
> Index: gcc/target.h
> ===================================================================
> --- gcc/target.h        2017-09-11 17:10:58.656085547 +0100
> +++ gcc/target.h        2017-09-14 11:25:32.162167193 +0100
> @@ -191,6 +191,14 @@ enum vect_cost_model_location {
>    vect_epilogue = 2
>  };
>
> +/* The type to use for vector permutes with a constant permute vector.
> +   Each entry is an index into the concatenated input vectors.  */
> +typedef vec<unsigned char> vec_perm_indices;
> +
> +/* Same, but can be used to construct local permute vectors that are
> +   automatically freed.  */
> +typedef auto_vec<unsigned char, 32> auto_vec_perm_indices;
> +
>  /* The target structure.  This holds all the backend hooks.  */
>  #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME;
>  #define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS;
> Index: gcc/optabs-query.h
> ===================================================================
> --- gcc/optabs-query.h  2017-08-30 12:14:51.272396735 +0100
> +++ gcc/optabs-query.h  2017-09-14 11:25:32.162167193 +0100
> @@ -21,6 +21,7 @@ the Free Software Foundation; either ver
>  #define GCC_OPTABS_QUERY_H
>
>  #include "insn-opinit.h"
> +#include "target.h"
>
>  /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
>     if the target does not have such an insn.  */
> @@ -165,7 +166,7 @@ enum insn_code can_extend_p (machine_mod
>  enum insn_code can_float_p (machine_mode, machine_mode, int);
>  enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *);
>  bool can_conditionally_move_p (machine_mode mode);
> -bool can_vec_perm_p (machine_mode, bool, const unsigned char *);
> +bool can_vec_perm_p (machine_mode, bool, vec_perm_indices *);
>  enum insn_code widening_optab_handler (optab, machine_mode, machine_mode);
>  /* Find a widening optab even if it doesn't widen as much as we want.  */
>  #define find_widening_optab_handler(A,B,C,D) \
> Index: gcc/optabs-query.c
> ===================================================================
> --- gcc/optabs-query.c  2017-09-05 20:57:40.745898121 +0100
> +++ gcc/optabs-query.c  2017-09-14 11:25:32.162167193 +0100
> @@ -353,8 +353,7 @@ can_conditionally_move_p (machine_mode m
>     zeroes; this case is not dealt with here.  */
>
>  bool
> -can_vec_perm_p (machine_mode mode, bool variable,
> -               const unsigned char *sel)
> +can_vec_perm_p (machine_mode mode, bool variable, vec_perm_indices *sel)
>  {
>    machine_mode qimode;
>
> @@ -368,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool
>        if (direct_optab_handler (vec_perm_const_optab, mode) != 
> CODE_FOR_nothing
>           && (sel == NULL
>               || targetm.vectorize.vec_perm_const_ok == NULL
> -             || targetm.vectorize.vec_perm_const_ok (mode, sel)))
> +             || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0])))
>         return true;
>      }
>
> @@ -460,7 +459,6 @@ find_widening_optab_handler_and_mode (op
>  can_mult_highpart_p (machine_mode mode, bool uns_p)
>  {
>    optab op;
> -  unsigned char *sel;
>    unsigned i, nunits;
>
>    op = uns_p ? umul_highpart_optab : smul_highpart_optab;
> @@ -472,7 +470,6 @@ can_mult_highpart_p (machine_mode mode,
>      return 0;
>
>    nunits = GET_MODE_NUNITS (mode);
> -  sel = XALLOCAVEC (unsigned char, nunits);
>
>    op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
>    if (optab_handler (op, mode) != CODE_FOR_nothing)
> @@ -480,9 +477,12 @@ can_mult_highpart_p (machine_mode mode,
>        op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
>        if (optab_handler (op, mode) != CODE_FOR_nothing)
>         {
> +         auto_vec_perm_indices sel (nunits);
>           for (i = 0; i < nunits; ++i)
> -           sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
> -         if (can_vec_perm_p (mode, false, sel))
> +           sel.quick_push (!BYTES_BIG_ENDIAN
> +                           + (i & ~1)
> +                           + ((i & 1) ? nunits : 0));
> +         if (can_vec_perm_p (mode, false, &sel))
>             return 2;
>         }
>      }
> @@ -493,9 +493,10 @@ can_mult_highpart_p (machine_mode mode,
>        op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
>        if (optab_handler (op, mode) != CODE_FOR_nothing)
>         {
> +         auto_vec_perm_indices sel (nunits);
>           for (i = 0; i < nunits; ++i)
> -           sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
> -         if (can_vec_perm_p (mode, false, sel))
> +           sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
> +         if (can_vec_perm_p (mode, false, &sel))
>             return 3;
>         }
>      }
> Index: gcc/tree-ssa-forwprop.c
> ===================================================================
> --- gcc/tree-ssa-forwprop.c     2017-09-14 11:24:42.667010577 +0100
> +++ gcc/tree-ssa-forwprop.c     2017-09-14 11:25:32.163167193 +0100
> @@ -1952,7 +1952,6 @@ simplify_vector_constructor (gimple_stmt
>    unsigned elem_size, nelts, i;
>    enum tree_code code, conv_code;
>    constructor_elt *elt;
> -  unsigned char *sel;
>    bool maybe_ident;
>
>    gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
> @@ -1965,7 +1964,7 @@ simplify_vector_constructor (gimple_stmt
>    elem_type = TREE_TYPE (type);
>    elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
>
> -  sel = XALLOCAVEC (unsigned char, nelts);
> +  auto_vec_perm_indices sel (nelts);
>    orig = NULL;
>    conv_code = ERROR_MARK;
>    maybe_ident = true;
> @@ -2023,8 +2022,10 @@ simplify_vector_constructor (gimple_stmt
>         }
>        if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
>         return false;
> -      sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
> -      if (sel[i] != i) maybe_ident = false;
> +      unsigned int elt = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / 
> elem_size;
> +      if (elt != i)
> +       maybe_ident = false;
> +      sel.quick_push (elt);
>      }
>    if (i < nelts)
>      return false;
> @@ -2053,7 +2054,7 @@ simplify_vector_constructor (gimple_stmt
>      {
>        tree mask_type;
>
> -      if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (type), false, &sel))
>         return false;
>        mask_type
>         = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
> Index: gcc/tree-vect-generic.c
> ===================================================================
> --- gcc/tree-vect-generic.c     2017-09-14 11:24:42.667010577 +0100
> +++ gcc/tree-vect-generic.c     2017-09-14 11:25:32.164167193 +0100
> @@ -1300,13 +1300,13 @@ lower_vec_perm (gimple_stmt_iterator *gs
>
>    if (TREE_CODE (mask) == VECTOR_CST)
>      {
> -      unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
> +      auto_vec_perm_indices sel_int (elements);
>
>        for (i = 0; i < elements; ++i)
> -       sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
> -                     & (2 * elements - 1));
> +       sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
> +                           & (2 * elements - 1));
>
> -      if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
> +      if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int))
>         {
>           gimple_assign_set_rhs3 (stmt, mask);
>           update_stmt (stmt);
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c   2017-08-30 12:10:14.677681466 +0100
> +++ gcc/tree-vect-data-refs.c   2017-09-14 11:25:32.163167193 +0100
> @@ -4547,7 +4547,8 @@ vect_grouped_store_supported (tree vecty
>    if (VECTOR_MODE_P (mode))
>      {
>        unsigned int i, nelt = GET_MODE_NUNITS (mode);
> -      unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
> +      auto_vec_perm_indices sel (nelt);
> +      sel.quick_grow (nelt);
>
>        if (count == 3)
>         {
> @@ -4568,7 +4569,7 @@ vect_grouped_store_supported (tree vecty
>                   if (3 * i + nelt2 < nelt)
>                     sel[3 * i + nelt2] = 0;
>                 }
> -             if (!can_vec_perm_p (mode, false, sel))
> +             if (!can_vec_perm_p (mode, false, &sel))
>                 {
>                   if (dump_enabled_p ())
>                     dump_printf (MSG_MISSED_OPTIMIZATION,
> @@ -4585,7 +4586,7 @@ vect_grouped_store_supported (tree vecty
>                   if (3 * i + nelt2 < nelt)
>                     sel[3 * i + nelt2] = nelt + j2++;
>                 }
> -             if (!can_vec_perm_p (mode, false, sel))
> +             if (!can_vec_perm_p (mode, false, &sel))
>                 {
>                   if (dump_enabled_p ())
>                     dump_printf (MSG_MISSED_OPTIMIZATION,
> @@ -4605,13 +4606,13 @@ vect_grouped_store_supported (tree vecty
>               sel[i * 2] = i;
>               sel[i * 2 + 1] = i + nelt;
>             }
> -           if (can_vec_perm_p (mode, false, sel))
> -             {
> -               for (i = 0; i < nelt; i++)
> -                 sel[i] += nelt / 2;
> -               if (can_vec_perm_p (mode, false, sel))
> -                 return true;
> -             }
> +         if (can_vec_perm_p (mode, false, &sel))
> +           {
> +             for (i = 0; i < nelt; i++)
> +               sel[i] += nelt / 2;
> +             if (can_vec_perm_p (mode, false, &sel))
> +               return true;
> +           }
>         }
>      }
>
> @@ -4710,7 +4711,9 @@ vect_permute_store_chain (vec<tree> dr_c
>    tree perm3_mask_low, perm3_mask_high;
>    unsigned int i, n, log_length = exact_log2 (length);
>    unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
> -  unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
> +
> +  auto_vec_perm_indices sel (nelt);
> +  sel.quick_grow (nelt);
>
>    result_chain->quick_grow (length);
>    memcpy (result_chain->address (), dr_chain.address (),
> @@ -5132,7 +5135,8 @@ vect_grouped_load_supported (tree vectyp
>    if (VECTOR_MODE_P (mode))
>      {
>        unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
> -      unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
> +      auto_vec_perm_indices sel (nelt);
> +      sel.quick_grow (nelt);
>
>        if (count == 3)
>         {
> @@ -5144,7 +5148,7 @@ vect_grouped_load_supported (tree vectyp
>                   sel[i] = 3 * i + k;
>                 else
>                   sel[i] = 0;
> -             if (!can_vec_perm_p (mode, false, sel))
> +             if (!can_vec_perm_p (mode, false, &sel))
>                 {
>                   if (dump_enabled_p ())
>                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5157,7 +5161,7 @@ vect_grouped_load_supported (tree vectyp
>                   sel[i] = i;
>                 else
>                   sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
> -             if (!can_vec_perm_p (mode, false, sel))
> +             if (!can_vec_perm_p (mode, false, &sel))
>                 {
>                   if (dump_enabled_p ())
>                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5174,11 +5178,11 @@ vect_grouped_load_supported (tree vectyp
>           gcc_assert (pow2p_hwi (count));
>           for (i = 0; i < nelt; i++)
>             sel[i] = i * 2;
> -         if (can_vec_perm_p (mode, false, sel))
> +         if (can_vec_perm_p (mode, false, &sel))
>             {
>               for (i = 0; i < nelt; i++)
>                 sel[i] = i * 2 + 1;
> -             if (can_vec_perm_p (mode, false, sel))
> +             if (can_vec_perm_p (mode, false, &sel))
>                 return true;
>             }
>          }
> @@ -5292,7 +5296,9 @@ vect_permute_load_chain (vec<tree> dr_ch
>    tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
>    unsigned int i, j, log_length = exact_log2 (length);
>    unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
> -  unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
> +
> +  auto_vec_perm_indices sel (nelt);
> +  sel.quick_grow (nelt);
>
>    result_chain->quick_grow (length);
>    memcpy (result_chain->address (), dr_chain.address (),
> @@ -5486,10 +5492,12 @@ vect_shift_permute_load_chain (vec<tree>
>    tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
>    unsigned int i;
>    unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
> -  unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
>    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
>
> +  auto_vec_perm_indices sel (nelt);
> +  sel.quick_grow (nelt);
> +
>    result_chain->quick_grow (length);
>    memcpy (result_chain->address (), dr_chain.address (),
>           length * sizeof (tree));
> @@ -5501,7 +5509,7 @@ vect_shift_permute_load_chain (vec<tree>
>         sel[i] = i * 2;
>        for (i = 0; i < nelt / 2; ++i)
>         sel[nelt / 2 + i] = i * 2 + 1;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5515,7 +5523,7 @@ vect_shift_permute_load_chain (vec<tree>
>         sel[i] = i * 2 + 1;
>        for (i = 0; i < nelt / 2; ++i)
>         sel[nelt / 2 + i] = i * 2;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5529,7 +5537,7 @@ vect_shift_permute_load_chain (vec<tree>
>          For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
>        for (i = 0; i < nelt; i++)
>         sel[i] = nelt / 2 + i;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5544,7 +5552,7 @@ vect_shift_permute_load_chain (vec<tree>
>         sel[i] = i;
>        for (i = nelt / 2; i < nelt; i++)
>         sel[i] = nelt + i;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5607,7 +5615,7 @@ vect_shift_permute_load_chain (vec<tree>
>           sel[i] = 3 * k + (l % 3);
>           k++;
>         }
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5621,7 +5629,7 @@ vect_shift_permute_load_chain (vec<tree>
>          For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
>        for (i = 0; i < nelt; i++)
>         sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5634,7 +5642,7 @@ vect_shift_permute_load_chain (vec<tree>
>          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
>        for (i = 0; i < nelt; i++)
>         sel[i] = 2 * (nelt / 3) + 1 + i;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5647,7 +5655,7 @@ vect_shift_permute_load_chain (vec<tree>
>          For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
>        for (i = 0; i < nelt; i++)
>         sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -5660,7 +5668,7 @@ vect_shift_permute_load_chain (vec<tree>
>          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
>        for (i = 0; i < nelt; i++)
>         sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    2017-09-14 11:24:42.666088258 +0100
> +++ gcc/fold-const.c    2017-09-14 11:25:32.162167193 +0100
> @@ -8786,12 +8786,14 @@ vec_cst_ctor_to_array (tree arg, unsigne
>     NULL_TREE otherwise.  */
>
>  static tree
> -fold_vec_perm (tree type, tree arg0, tree arg1, const unsigned char *sel)
> +fold_vec_perm (tree type, tree arg0, tree arg1, vec_perm_indices sel)
>  {
> -  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
> +  unsigned int i;
>    bool need_ctor = false;
>
> -  gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts
> +  unsigned int nelts = sel.length ();
> +  gcc_assert (TYPE_VECTOR_SUBPARTS (type) == nelts
> +             && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts
>               && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts);
>    if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type)
>        || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type))
> @@ -11312,15 +11314,15 @@ fold_ternary_loc (location_t loc, enum t
>                   || TREE_CODE (arg2) == CONSTRUCTOR))
>             {
>               unsigned int nelts = VECTOR_CST_NELTS (arg0), i;
> -             unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
>               gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type));
> +             auto_vec_perm_indices sel (nelts);
>               for (i = 0; i < nelts; i++)
>                 {
>                   tree val = VECTOR_CST_ELT (arg0, i);
>                   if (integer_all_onesp (val))
> -                   sel[i] = i;
> +                   sel.quick_push (i);
>                   else if (integer_zerop (val))
> -                   sel[i] = nelts + i;
> +                   sel.quick_push (nelts + i);
>                   else /* Currently unreachable.  */
>                     return NULL_TREE;
>                 }
> @@ -11643,8 +11645,6 @@ fold_ternary_loc (location_t loc, enum t
>        if (TREE_CODE (arg2) == VECTOR_CST)
>         {
>           unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2;
> -         unsigned char *sel = XALLOCAVEC (unsigned char, 2 * nelts);
> -         unsigned char *sel2 = sel + nelts;
>           bool need_mask_canon = false;
>           bool need_mask_canon2 = false;
>           bool all_in_vec0 = true;
> @@ -11656,6 +11656,8 @@ fold_ternary_loc (location_t loc, enum t
>           mask2 = 2 * nelts - 1;
>           mask = single_arg ? (nelts - 1) : mask2;
>           gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type));
> +         auto_vec_perm_indices sel (nelts);
> +         auto_vec_perm_indices sel2 (nelts);
>           for (i = 0; i < nelts; i++)
>             {
>               tree val = VECTOR_CST_ELT (arg2, i);
> @@ -11667,16 +11669,19 @@ fold_ternary_loc (location_t loc, enum t
>               wide_int t = val;
>               need_mask_canon |= wi::gtu_p (t, mask);
>               need_mask_canon2 |= wi::gtu_p (t, mask2);
> -             sel[i] = t.to_uhwi () & mask;
> -             sel2[i] = t.to_uhwi () & mask2;
> +             unsigned int elt = t.to_uhwi () & mask;
> +             unsigned int elt2 = t.to_uhwi () & mask2;
>
> -             if (sel[i] < nelts)
> +             if (elt < nelts)
>                 all_in_vec1 = false;
>               else
>                 all_in_vec0 = false;
>
> -             if ((sel[i] & (nelts-1)) != i)
> +             if ((elt & (nelts - 1)) != i)
>                 maybe_identity = false;
> +
> +             sel.quick_push (elt);
> +             sel2.quick_push (elt2);
>             }
>
>           if (maybe_identity)
> @@ -11714,8 +11719,8 @@ fold_ternary_loc (location_t loc, enum t
>              argument permutation while still allowing an equivalent
>              2-argument version.  */
>           if (need_mask_canon && arg2 == op2
> -             && !can_vec_perm_p (TYPE_MODE (type), false, sel)
> -             && can_vec_perm_p (TYPE_MODE (type), false, sel2))
> +             && !can_vec_perm_p (TYPE_MODE (type), false, &sel)
> +             && can_vec_perm_p (TYPE_MODE (type), false, &sel2))
>             {
>               need_mask_canon = need_mask_canon2;
>               sel = sel2;
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        2017-09-14 11:24:42.667932896 +0100
> +++ gcc/tree-vect-loop.c        2017-09-14 11:25:32.164167193 +0100
> @@ -3698,15 +3698,15 @@ vect_estimate_min_profitable_iters (loop
>  }
>
>  /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
> -   vector elements (not bits) for a vector of mode MODE.  */
> +   vector elements (not bits) for a vector with NELT elements.  */
>  static void
> -calc_vec_perm_mask_for_shift (machine_mode mode, unsigned int offset,
> -                             unsigned char *sel)
> +calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
> +                             vec_perm_indices *sel)
>  {
> -  unsigned int i, nelt = GET_MODE_NUNITS (mode);
> +  unsigned int i;
>
>    for (i = 0; i < nelt; i++)
> -    sel[i] = (i + offset) & (2*nelt - 1);
> +    sel->quick_push ((i + offset) & (2 * nelt - 1));
>  }
>
>  /* Checks whether the target supports whole-vector shifts for vectors of mode
> @@ -3722,12 +3722,13 @@ have_whole_vector_shift (machine_mode mo
>      return false;
>
>    unsigned int i, nelt = GET_MODE_NUNITS (mode);
> -  unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
> +  auto_vec_perm_indices sel (nelt);
>
>    for (i = nelt/2; i >= 1; i/=2)
>      {
> -      calc_vec_perm_mask_for_shift (mode, i, sel);
> -      if (!can_vec_perm_p (mode, false, sel))
> +      sel.truncate (0);
> +      calc_vec_perm_mask_for_shift (i, nelt, &sel);
> +      if (!can_vec_perm_p (mode, false, &sel))
>         return false;
>      }
>    return true;
> @@ -5059,7 +5060,7 @@ vect_create_epilog_for_reduction (vec<tr
>        if (reduce_with_shift && !slp_reduc)
>          {
>            int nelements = vec_size_in_bits / element_bitsize;
> -          unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
> +          auto_vec_perm_indices sel (nelements);
>
>            int elt_offset;
>
> @@ -5083,8 +5084,9 @@ vect_create_epilog_for_reduction (vec<tr
>                 elt_offset >= 1;
>                 elt_offset /= 2)
>              {
> -              calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
> -              tree mask = vect_gen_perm_mask_any (vectype, sel);
> +             sel.truncate (0);
> +             calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
> +             tree mask = vect_gen_perm_mask_any (vectype, sel);
>               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
>                                                  new_temp, zero_vec, mask);
>                new_name = make_ssa_name (vec_dest, epilog_stmt);
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c 2017-09-14 11:24:42.667932896 +0100
> +++ gcc/tree-vect-slp.c 2017-09-14 11:25:32.165167193 +0100
> @@ -873,15 +873,16 @@ vect_build_slp_tree_1 (vec_info *vinfo,
>    if (alt_stmt_code != ERROR_MARK
>        && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
>      {
> -      unsigned char *sel
> -       = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
> -      for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
> +      unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
> +      auto_vec_perm_indices sel (count);
> +      for (i = 0; i < count; ++i)
>         {
> -         sel[i] = i;
> +         unsigned int elt = i;
>           if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
> -           sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
> +           elt += count;
> +         sel.quick_push (elt);
>         }
> -      if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +      if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>         {
>           for (i = 0; i < group_size; ++i)
>             if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
> @@ -3486,7 +3487,6 @@ vect_transform_slp_perm_load (slp_tree n
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>    int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
>    int mask_element;
> -  unsigned char *mask;
>    machine_mode mode;
>
>    if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
> @@ -3502,7 +3502,8 @@ vect_transform_slp_perm_load (slp_tree n
>      (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
>    mask_type = get_vectype_for_scalar_type (mask_element_type);
>    nunits = TYPE_VECTOR_SUBPARTS (vectype);
> -  mask = XALLOCAVEC (unsigned char, nunits);
> +  auto_vec_perm_indices mask (nunits);
> +  mask.quick_grow (nunits);
>
>    /* Initialize the vect stmts of NODE to properly insert the generated
>       stmts later.  */
> @@ -3577,7 +3578,7 @@ vect_transform_slp_perm_load (slp_tree n
>           if (index == nunits)
>             {
>               if (! noop_p
> -                 && ! can_vec_perm_p (mode, false, mask))
> +                 && ! can_vec_perm_p (mode, false, &mask))
>                 {
>                   if (dump_enabled_p ())
>                     {
> @@ -3730,15 +3731,15 @@ vect_schedule_slp_instance (slp_tree nod
>        enum tree_code code0 = gimple_assign_rhs_code (stmt);
>        enum tree_code ocode = ERROR_MARK;
>        gimple *ostmt;
> -      unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
> +      auto_vec_perm_indices mask (group_size);
>        FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
>         if (gimple_assign_rhs_code (ostmt) != code0)
>           {
> -           mask[i] = 1;
> +           mask.quick_push (1);
>             ocode = gimple_assign_rhs_code (ostmt);
>           }
>         else
> -         mask[i] = 0;
> +         mask.quick_push (0);
>        if (ocode != ERROR_MARK)
>         {
>           vec<gimple *> v0;
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       2017-08-29 20:01:07.143372092 +0100
> +++ gcc/tree-vectorizer.h       2017-09-14 11:25:32.166167193 +0100
> @@ -1151,8 +1151,8 @@ extern void vect_get_load_cost (struct d
>  extern void vect_get_store_cost (struct data_reference *, int,
>                                  unsigned int *, stmt_vector_for_cost *);
>  extern bool vect_supportable_shift (enum tree_code, tree);
> -extern tree vect_gen_perm_mask_any (tree, const unsigned char *);
> -extern tree vect_gen_perm_mask_checked (tree, const unsigned char *);
> +extern tree vect_gen_perm_mask_any (tree, vec_perm_indices);
> +extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices);
>  extern void optimize_mask_stores (struct loop*);
>
>  /* In tree-vect-data-refs.c.  */
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c       2017-09-14 11:24:42.668855214 +0100
> +++ gcc/tree-vect-stmts.c       2017-09-14 11:25:32.166167193 +0100
> @@ -1706,15 +1706,14 @@ compare_step_with_zero (gimple *stmt)
>  perm_mask_for_reverse (tree vectype)
>  {
>    int i, nunits;
> -  unsigned char *sel;
>
>    nunits = TYPE_VECTOR_SUBPARTS (vectype);
> -  sel = XALLOCAVEC (unsigned char, nunits);
>
> +  auto_vec_perm_indices sel (nunits);
>    for (i = 0; i < nunits; ++i)
> -    sel[i] = nunits - 1 - i;
> +    sel.quick_push (nunits - 1 - i);
>
> -  if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
> +  if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
>      return NULL_TREE;
>    return vect_gen_perm_mask_checked (vectype, sel);
>  }
> @@ -2171,19 +2170,20 @@ vectorizable_mask_load_store (gimple *st
>         modifier = NONE;
>        else if (nunits == gather_off_nunits / 2)
>         {
> -         unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
>           modifier = WIDEN;
>
> +         auto_vec_perm_indices sel (gather_off_nunits);
>           for (i = 0; i < gather_off_nunits; ++i)
> -           sel[i] = i | nunits;
> +           sel.quick_push (i | nunits);
>
>           perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, 
> sel);
>         }
>        else if (nunits == gather_off_nunits * 2)
>         {
> -         unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
>           modifier = NARROW;
>
> +         auto_vec_perm_indices sel (nunits);
> +         sel.quick_grow (nunits);
>           for (i = 0; i < nunits; ++i)
>             sel[i] = i < gather_off_nunits
>                      ? i : i + nunits - gather_off_nunits;
> @@ -2481,14 +2481,14 @@ vectorizable_bswap (gimple *stmt, gimple
>      return false;
>
>    unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
> -  unsigned char *elts = XALLOCAVEC (unsigned char, num_bytes);
> -  unsigned char *elt = elts;
>    unsigned word_bytes = num_bytes / nunits;
> +
> +  auto_vec_perm_indices elts (num_bytes);
>    for (unsigned i = 0; i < nunits; ++i)
>      for (unsigned j = 0; j < word_bytes; ++j)
> -      *elt++ = (i + 1) * word_bytes - j - 1;
> +      elts.quick_push ((i + 1) * word_bytes - j - 1);
>
> -  if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
> +  if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
>      return false;
>
>    if (! vec_stmt)
> @@ -5803,22 +5803,22 @@ vectorizable_store (gimple *stmt, gimple
>         modifier = NONE;
>        else if (nunits == (unsigned int) scatter_off_nunits / 2)
>         {
> -         unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
>           modifier = WIDEN;
>
> +         auto_vec_perm_indices sel (scatter_off_nunits);
>           for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
> -           sel[i] = i | nunits;
> +           sel.quick_push (i | nunits);
>
>           perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, 
> sel);
>           gcc_assert (perm_mask != NULL_TREE);
>         }
>        else if (nunits == (unsigned int) scatter_off_nunits * 2)
>         {
> -         unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
>           modifier = NARROW;
>
> +         auto_vec_perm_indices sel (nunits);
>           for (i = 0; i < (unsigned int) nunits; ++i)
> -           sel[i] = i | scatter_off_nunits;
> +           sel.quick_push (i | scatter_off_nunits);
>
>           perm_mask = vect_gen_perm_mask_checked (vectype, sel);
>           gcc_assert (perm_mask != NULL_TREE);
> @@ -6503,19 +6503,19 @@ vectorizable_store (gimple *stmt, gimple
>     vect_gen_perm_mask_checked.  */
>
>  tree
> -vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
> +vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel)
>  {
>    tree mask_elt_type, mask_type, mask_vec;
> -  int i, nunits;
>
> -  nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +  unsigned int nunits = sel.length ();
> +  gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
>
>    mask_elt_type = lang_hooks.types.type_for_mode
>      (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
>    mask_type = get_vectype_for_scalar_type (mask_elt_type);
>
>    auto_vec<tree, 32> mask_elts (nunits);
> -  for (i = 0; i < nunits; ++i)
> +  for (unsigned int i = 0; i < nunits; ++i)
>      mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i]));
>    mask_vec = build_vector (mask_type, mask_elts);
>
> @@ -6526,9 +6526,9 @@ vect_gen_perm_mask_any (tree vectype, co
>     i.e. that the target supports the pattern _for arbitrary input vectors_.  
> */
>
>  tree
> -vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
> +vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel)
>  {
> -  gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
> +  gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel));
>    return vect_gen_perm_mask_any (vectype, sel);
>  }
>
> @@ -6841,22 +6841,22 @@ vectorizable_load (gimple *stmt, gimple_
>         modifier = NONE;
>        else if (nunits == gather_off_nunits / 2)
>         {
> -         unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
>           modifier = WIDEN;
>
> +         auto_vec_perm_indices sel (gather_off_nunits);
>           for (i = 0; i < gather_off_nunits; ++i)
> -           sel[i] = i | nunits;
> +           sel.quick_push (i | nunits);
>
>           perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, 
> sel);
>         }
>        else if (nunits == gather_off_nunits * 2)
>         {
> -         unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
>           modifier = NARROW;
>
> +         auto_vec_perm_indices sel (nunits);
>           for (i = 0; i < nunits; ++i)
> -           sel[i] = i < gather_off_nunits
> -                    ? i : i + nunits - gather_off_nunits;
> +           sel.quick_push (i < gather_off_nunits
> +                           ? i : i + nunits - gather_off_nunits);
>
>           perm_mask = vect_gen_perm_mask_checked (vectype, sel);
>           ncopies *= 2;

Reply via email to