On Tue, 3 Sep 2024, Tamar Christina wrote: > Hi All, > > Currently the vectorizer cheats when lowering COND_EXPR during bool recog. > In the cases where the conditonal is loop invariant or non-boolean it instead > converts the operation back into GENERIC and hides much of the operation from > the analysis part of the vectorizer. > > i.e. > > a ? b : c > > is transformed into: > > a != 0 ? b : c > > however by doing so we can't perform any optimization on the mask as they > aren't > explicit until quite late during codegen. > > To fix this this patch lowers booleans earlier and so ensures that we are > always > in GIMPLE. > > For when the value is a loop invariant boolean we have to generate an > additional > conversion from bool to the integer mask form. > > This is done by creating a loop invariant a ? -1 : 0 with the target mask > precision and then doing a normal != 0 comparison on that. > > To support this the patch also adds the ability to during pattern matching > create a loop invariant pattern that won't be seen by the vectorizer and will > instead me materialized inside the loop preheader in the case of loops, or in > the case of BB vectorization it materializes it in the first BB in the region. > > Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, > x86_64-pc-linux-gnu -m32, -m64 and no issues. > > Ok for master?
OK, but can you clarify a question below? > Thanks, > Tamar > > gcc/ChangeLog: > > * tree-vect-patterns.cc (append_inv_pattern_def_seq): New. > (vect_recog_bool_pattern): Lower COND_EXPRs. > * tree-vect-slp.cc (vect_schedule_slp): Materialize loop invariant > statements. > * tree-vect-loop.cc (vect_transform_loop): Likewise. > * tree-vect-stmts.cc (vectorizable_comparison_1): Remove > VECT_SCALAR_BOOLEAN_TYPE_P handling for vectype. > * tree-vectorizer.cc (vec_info::vec_info): Initialize > inv_pattern_def_seq. > * tree-vectorizer.h (LOOP_VINFO_INV_PATTERN_DEF_SEQ): New. > (class vec_info): Add inv_pattern_def_seq. > > gcc/testsuite/ChangeLog: > > * gcc.dg/vect/bb-slp-conditional_store_1.c: New test. > * gcc.dg/vect/vect-conditional_store_5.c: New test. > * gcc.dg/vect/vect-conditional_store_6.c: New test. > > --- > diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c > b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..650a3bfbfb1dd44afc2d58bbe85f75f1d28b9bd0 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_float } */ > + > +/* { dg-additional-options "-mavx2" { target avx2 } } */ > +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ > + > +void foo3 (float *restrict a, int *restrict c) > +{ > +#pragma GCC unroll 8 > + for (int i = 0; i < 8; i++) > + c[i] = a[i] > 1.0; > +} > + > +/* { dg-final { scan-tree-dump "vectorized using SLP" "slp1" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c > b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..37d60fa76351c13980427751be4450c14617a9a9 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_masked_store } */ > + > +/* { dg-additional-options "-mavx2" { target avx2 } } */ > +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ > + > +#include <stdbool.h> > + > +void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int > stride) > +{ > + if (stride <= 1) > + return; > + > + bool ai = a[0]; > + > + for (int i = 0; i < n; i++) > + { > + int res = c[i]; > + int t = b[i+stride]; > + if (ai) > + t = res; > + c[i] = t; > + } > +} > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target > aarch64-*-* } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c > b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..5e1aedf3726b073c132bb64a9b474592ceb8e9b9 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_masked_store } */ > + > +/* { dg-additional-options "-mavx2" { target avx2 } } */ > +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ > + > +void foo3 (unsigned long long *restrict a, int *restrict b, int *restrict c, > int n, int stride) > +{ > + if (stride <= 1) > + return; > + > + for (int i = 0; i < n; i++) > + { > + int res = c[i]; > + int t = b[i+stride]; > + if (a[i]) > + t = res; > + c[i] = t; > + } > +} > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target > aarch64-*-* } } } */ > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index > 6456220cdc9bb0ba35baf04c555060ea38d13bbc..e4bed1f88435cb6ebad5651a266a9c74106500c0 > 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -12404,6 +12404,18 @@ vect_transform_loop (loop_vec_info loop_vinfo, > gimple *loop_vectorized_call) > vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo)); > } > > + /* Generate the loop invariant statements. */ > + if (!gimple_seq_empty_p (LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "------>generating loop invariant statements\n"); > + gimple_stmt_iterator gsi; > + gsi = gsi_after_labels (loop_preheader_edge (loop)->src); > + gsi_insert_seq_before (&gsi, LOOP_VINFO_INV_PATTERN_DEF_SEQ > (loop_vinfo), > + GSI_CONTINUE_LINKING); > + } > + So this is one instance ... > /* FORNOW: the vectorizer supports only loops which body consist > of one basic block (header + empty latch). When the vectorizer will > support more involved loop forms, the order by which the BBs are > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index > 4b112910df357e9f2783f7173b71812085126389..3aae6a066954d9e1e0d41803dd43307fa297af67 > 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -182,6 +182,17 @@ append_pattern_def_seq (vec_info *vinfo, > new_stmt); > } > > + > +/* Add NEW_STMT to VINFO's invariant pattern definition statements. These > + statements are not vectorized but are materialized as scalar in the loop > + preheader. */ > + > +static inline void > +append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt) > +{ > + gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt); > +} > + > /* The caller wants to perform new operations on vect_external variable > VAR, so that the result of the operations would also be vect_external. > Return the edge on which the operations can be performed, if one exists. > @@ -5983,12 +5994,34 @@ vect_recog_bool_pattern (vec_info *vinfo, > var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo); > else if (integer_type_for_mask (var, vinfo)) > return NULL; > + else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE > + && !vect_get_internal_def (vinfo, var)) > + { > + /* If the condition is already a boolean then manually convert it to a > + mask of the given integer type but don't set a vectype. */ > + tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL); > + pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var, > + build_all_ones_cst (type), > + build_zero_cst (type)); > + append_inv_pattern_def_seq (vinfo, pattern_stmt); > + var = lhs_ivar; > + } > + > + tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL); > + pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var, > + build_zero_cst (TREE_TYPE (var))); > + > + tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE > (var)); > + if (!new_vectype) > + return NULL; > + > + new_vectype = truth_type_for (new_vectype); > + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype, > + TREE_TYPE (var)); > > lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); > pattern_stmt > - = gimple_build_assign (lhs, COND_EXPR, > - build2 (NE_EXPR, boolean_type_node, > - var, build_int_cst (TREE_TYPE (var), 0)), > + = gimple_build_assign (lhs, COND_EXPR, lhs_var, > gimple_assign_rhs2 (last_stmt), > gimple_assign_rhs3 (last_stmt)); > *type_out = vectype; > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index > 43ecd2689701451b706b41d73ba60773af4cf8a5..1ea836ca8fe1d4867504c6da42a40c22b6638385 > 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -10393,4 +10393,23 @@ vect_schedule_slp (vec_info *vinfo, const > vec<slp_instance> &slp_instances) > SLP_TREE_REPRESENTATIVE (root) = NULL; > } > } > + > + /* Generate the invariant statements. */ > + if (!gimple_seq_empty_p (vinfo->inv_pattern_def_seq)) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "------>generating invariant statements\n"); > + gimple_stmt_iterator gsi; > + basic_block bb_inv = vinfo->bbs[0]; > + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); > + if (loop_vinfo) > + bb_inv = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo))->src; > + > + gsi = gsi_after_labels (bb_inv); > + gsi_insert_seq_before (&gsi, vinfo->inv_pattern_def_seq, > + GSI_CONTINUE_LINKING); > + vinfo->inv_pattern_def_seq = NULL; > + } ... and this another. This one is only required for BB vectorization? In that context it should probably best be done from vect_slp_region before the vect_schedule_slp call or at least guarded with BB vect? I suspect you ran into this only for BB vectorization from loop vect as that runs on if-converted code? Thanks for clarifying. Richard. > + > } > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index > 385e63163c2450b1cee9f3c2e5df11636116ec2d..c761360d2e80bbf25f55c00ee69bbadfcc9128e1 > 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -12652,11 +12652,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree > vectype, > /* Invariant comparison. */ > if (!vectype) > { > - if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) > - vectype = mask_type; > - else > - vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), > - slp_node); > + vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), > slp_node); > if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits)) > return false; > } > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index > df6c8ada2f7814ac1ea89913e881dd659bd2da62..d3389767325229e953212236c248c30697bbce0d > 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -509,6 +509,12 @@ public: > /* The count of the basic blocks in the vectorization region. */ > unsigned int nbbs; > > + /* Used to keep a sequence of def stmts of a pattern stmt that are loop > + invariant if they exists. > + The sequence is emitted in the loop preheader should the loop be > vectorized > + and are reset when undoing patterns. */ > + gimple_seq inv_pattern_def_seq; > + > private: > stmt_vec_info new_stmt_vec_info (gimple *stmt); > void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true); > @@ -1039,6 +1045,7 @@ public: > #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info > #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond > #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor > +#define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L) (L)->inv_pattern_def_seq > > #define LOOP_VINFO_FULLY_MASKED_P(L) \ > (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \ > diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc > index > 1fb4fb36ed44d11baea2d3db8eedf9685be344e8..70b83720fe28b8c8c567f68df1ceb308d49043dc > 100644 > --- a/gcc/tree-vectorizer.cc > +++ b/gcc/tree-vectorizer.cc > @@ -465,7 +465,8 @@ vec_info::vec_info (vec_info::vec_kind kind_in, > vec_info_shared *shared_) > shared (shared_), > stmt_vec_info_ro (false), > bbs (NULL), > - nbbs (0) > + nbbs (0), > + inv_pattern_def_seq (NULL) > { > stmt_vec_infos.create (50); > } > > > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)