Richard Biener via Gcc-patches <[email protected]> writes:
> This removes the cost part of vect_worthwhile_without_simd_p, retaining
> only the correctness bits. The reason is that the cost heuristic
> do not properly account for SLP plus the check whether "without simd"
> applies misfires for AVX512 mask vectors at the moment, leading to
> missed vectorizations there.
>
> Any costing decision should take place in the cost modeling, no
> single stmt is to disable all vectorization on its own.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed to trunk.
>
> 2021-08-06 Richard Biener <[email protected]>
>
> PR tree-optimization/101801
> * tree-vectorizer.h (vect_worthwhile_without_simd_p): Rename...
> (vect_can_vectorize_without_simd_p): ... to this.
> * tree-vect-loop.c (vect_worthwhile_without_simd_p): Rename...
> (vect_can_vectorize_without_simd_p): ... to this and fold
> in vect_min_worthwhile_factor.
> (vect_min_worthwhile_factor): Remove.
> (vectorizable_reduction): Adjust and remove the cost part.
> * tree-vect-stmts.c (vectorizable_shift): Likewise.
> (vectorizable_operation): Likewise.
> ---
> gcc/tree-vect-loop.c | 43 +++++++------------------------------------
> gcc/tree-vect-stmts.c | 26 ++------------------------
> gcc/tree-vectorizer.h | 2 +-
> 3 files changed, 10 insertions(+), 61 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 1e21fe6b13d..37c7daa7f9e 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -7227,24 +7227,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> if (dump_enabled_p ())
> dump_printf (MSG_NOTE, "op not supported by target.\n");
> if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
> - || !vect_worthwhile_without_simd_p (loop_vinfo, code))
> + || !vect_can_vectorize_without_simd_p (code))
> ok = false;
> else
> if (dump_enabled_p ())
> dump_printf (MSG_NOTE, "proceeding using word mode.\n");
> }
>
> - /* Worthwhile without SIMD support? */
> - if (ok
> - && !VECTOR_MODE_P (TYPE_MODE (vectype_in))
> - && !vect_worthwhile_without_simd_p (loop_vinfo, code))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "not worthwhile without SIMD support.\n");
> - ok = false;
> - }
> -
> /* lane-reducing operations have to go through
> vect_transform_reduction.
> For the other cases try without the single cycle optimization. */
> if (!ok)
> @@ -7948,46 +7937,28 @@ vectorizable_phi (vec_info *,
> }
>
>
> -/* Function vect_min_worthwhile_factor.
> +/* Return true if we can emulate CODE on an integer mode representation
> + of a vector. */
>
> - For a loop where we could vectorize the operation indicated by CODE,
> - return the minimum vectorization factor that makes it worthwhile
> - to use generic vectors. */
> -static unsigned int
> -vect_min_worthwhile_factor (enum tree_code code)
> +bool
> +vect_can_vectorize_without_simd_p (tree_code code)
> {
> switch (code)
> {
> case PLUS_EXPR:
> case MINUS_EXPR:
> case NEGATE_EXPR:
> - return 4;
> -
> case BIT_AND_EXPR:
> case BIT_IOR_EXPR:
> case BIT_XOR_EXPR:
> case BIT_NOT_EXPR:
> - return 2;
> + return true;
>
> default:
> - return INT_MAX;
> + return false;
> }
> }
>
> -/* Return true if VINFO indicates we are doing loop vectorization and if
> - it is worth decomposing CODE operations into scalar operations for
> - that loop's vectorization factor. */
> -
> -bool
> -vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
> -{
> - loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> - unsigned HOST_WIDE_INT value;
> - return (loop_vinfo
> - && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value)
> - && value >= vect_min_worthwhile_factor (code));
> -}
> -
> /* Function vectorizable_induction
>
> Check if STMT_INFO performs an induction computation that can be
> vectorized.
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 94bdb74ea8d..5b94d41e292 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -5685,24 +5685,13 @@ vectorizable_shift (vec_info *vinfo,
> /* Check only during analysis. */
> if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
> || (!vec_stmt
> - && !vect_worthwhile_without_simd_p (vinfo, code)))
> + && !vect_can_vectorize_without_simd_p (code)))
> return false;
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> "proceeding using word mode.\n");
> }
>
> - /* Worthwhile without SIMD support? Check only during analysis. */
> - if (!vec_stmt
> - && !VECTOR_MODE_P (TYPE_MODE (vectype))
> - && !vect_worthwhile_without_simd_p (vinfo, code))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "not worthwhile without SIMD support.\n");
> - return false;
> - }
> -
> if (!vec_stmt) /* transformation not required. */
> {
> if (slp_node
It looks from vect_min_worthwhile_factor like this shift stuff
was/is effectively dead code. Maybe it started life as a copy of
vectorizable_operation or something?
Thanks,
Richard
> @@ -6094,24 +6083,13 @@ vectorizable_operation (vec_info *vinfo,
> "op not supported by target.\n");
> /* Check only during analysis. */
> if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
> - || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
> + || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
> return false;
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> "proceeding using word mode.\n");
> }
>
> - /* Worthwhile without SIMD support? Check only during analysis. */
> - if (!VECTOR_MODE_P (vec_mode)
> - && !vec_stmt
> - && !vect_worthwhile_without_simd_p (vinfo, code))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "not worthwhile without SIMD support.\n");
> - return false;
> - }
> -
> int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
> vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) :
> NULL);
> internal_fn cond_fn = get_conditional_internal_fn (code);
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 5571b3cce3b..de0ecf86478 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2061,7 +2061,7 @@ extern bool vectorizable_lc_phi (loop_vec_info,
> stmt_vec_info,
> gimple **, slp_tree);
> extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
> stmt_vector_for_cost *);
> -extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
> +extern bool vect_can_vectorize_without_simd_p (tree_code);
> extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
> stmt_vector_for_cost *,
> stmt_vector_for_cost *,