Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > This removes the cost part of vect_worthwhile_without_simd_p, retaining > only the correctness bits. The reason is that the cost heuristic > do not properly account for SLP plus the check whether "without simd" > applies misfires for AVX512 mask vectors at the moment, leading to > missed vectorizations there. > > Any costing decision should take place in the cost modeling, no > single stmt is to disable all vectorization on its own. > > Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed to trunk. > > 2021-08-06 Richard Biener <rguent...@suse.de> > > PR tree-optimization/101801 > * tree-vectorizer.h (vect_worthwhile_without_simd_p): Rename... > (vect_can_vectorize_without_simd_p): ... to this. > * tree-vect-loop.c (vect_worthwhile_without_simd_p): Rename... > (vect_can_vectorize_without_simd_p): ... to this and fold > in vect_min_worthwhile_factor. > (vect_min_worthwhile_factor): Remove. > (vectorizable_reduction): Adjust and remove the cost part. > * tree-vect-stmts.c (vectorizable_shift): Likewise. > (vectorizable_operation): Likewise. > --- > gcc/tree-vect-loop.c | 43 +++++++------------------------------------ > gcc/tree-vect-stmts.c | 26 ++------------------------ > gcc/tree-vectorizer.h | 2 +- > 3 files changed, 10 insertions(+), 61 deletions(-) > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 1e21fe6b13d..37c7daa7f9e 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -7227,24 +7227,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > if (dump_enabled_p ()) > dump_printf (MSG_NOTE, "op not supported by target.\n"); > if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) > - || !vect_worthwhile_without_simd_p (loop_vinfo, code)) > + || !vect_can_vectorize_without_simd_p (code)) > ok = false; > else > if (dump_enabled_p ()) > dump_printf (MSG_NOTE, "proceeding using word mode.\n"); > } > > - /* Worthwhile without SIMD support? */ > - if (ok > - && !VECTOR_MODE_P (TYPE_MODE (vectype_in)) > - && !vect_worthwhile_without_simd_p (loop_vinfo, code)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "not worthwhile without SIMD support.\n"); > - ok = false; > - } > - > /* lane-reducing operations have to go through > vect_transform_reduction. > For the other cases try without the single cycle optimization. */ > if (!ok) > @@ -7948,46 +7937,28 @@ vectorizable_phi (vec_info *, > } > > > -/* Function vect_min_worthwhile_factor. > +/* Return true if we can emulate CODE on an integer mode representation > + of a vector. */ > > - For a loop where we could vectorize the operation indicated by CODE, > - return the minimum vectorization factor that makes it worthwhile > - to use generic vectors. */ > -static unsigned int > -vect_min_worthwhile_factor (enum tree_code code) > +bool > +vect_can_vectorize_without_simd_p (tree_code code) > { > switch (code) > { > case PLUS_EXPR: > case MINUS_EXPR: > case NEGATE_EXPR: > - return 4; > - > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > case BIT_XOR_EXPR: > case BIT_NOT_EXPR: > - return 2; > + return true; > > default: > - return INT_MAX; > + return false; > } > } > > -/* Return true if VINFO indicates we are doing loop vectorization and if > - it is worth decomposing CODE operations into scalar operations for > - that loop's vectorization factor. */ > - > -bool > -vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code) > -{ > - loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > - unsigned HOST_WIDE_INT value; > - return (loop_vinfo > - && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value) > - && value >= vect_min_worthwhile_factor (code)); > -} > - > /* Function vectorizable_induction > > Check if STMT_INFO performs an induction computation that can be > vectorized. > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 94bdb74ea8d..5b94d41e292 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -5685,24 +5685,13 @@ vectorizable_shift (vec_info *vinfo, > /* Check only during analysis. */ > if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) > || (!vec_stmt > - && !vect_worthwhile_without_simd_p (vinfo, code))) > + && !vect_can_vectorize_without_simd_p (code))) > return false; > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > "proceeding using word mode.\n"); > } > > - /* Worthwhile without SIMD support? Check only during analysis. */ > - if (!vec_stmt > - && !VECTOR_MODE_P (TYPE_MODE (vectype)) > - && !vect_worthwhile_without_simd_p (vinfo, code)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "not worthwhile without SIMD support.\n"); > - return false; > - } > - > if (!vec_stmt) /* transformation not required. */ > { > if (slp_node
It looks from vect_min_worthwhile_factor like this shift stuff was/is effectively dead code. Maybe it started life as a copy of vectorizable_operation or something? Thanks, Richard > @@ -6094,24 +6083,13 @@ vectorizable_operation (vec_info *vinfo, > "op not supported by target.\n"); > /* Check only during analysis. */ > if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) > - || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code))) > + || (!vec_stmt && !vect_can_vectorize_without_simd_p (code))) > return false; > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > "proceeding using word mode.\n"); > } > > - /* Worthwhile without SIMD support? Check only during analysis. */ > - if (!VECTOR_MODE_P (vec_mode) > - && !vec_stmt > - && !vect_worthwhile_without_simd_p (vinfo, code)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "not worthwhile without SIMD support.\n"); > - return false; > - } > - > int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); > vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : > NULL); > internal_fn cond_fn = get_conditional_internal_fn (code); > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 5571b3cce3b..de0ecf86478 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2061,7 +2061,7 @@ extern bool vectorizable_lc_phi (loop_vec_info, > stmt_vec_info, > gimple **, slp_tree); > extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree, > stmt_vector_for_cost *); > -extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); > +extern bool vect_can_vectorize_without_simd_p (tree_code); > extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, > stmt_vector_for_cost *, > stmt_vector_for_cost *,