On Wed, 13 Apr 2022, Richard Biener wrote: > When doing BB vectorization the scalar cost compute is derailed > by patterns, causing lanes to be considered live and thus not > costed on the scalar side. For the testcase in PR104010 this > prevents vectorization which was done by GCC 11. PR103941 > shows similar cases of missed optimizations that are fixed by > this patch. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > I'm only considering this now because PR104010 is identified > as regression on arm - Richards, what do you think? I do think > this will enable vectorization of more stuff now which might > be good or bad - who knowns, but at least it needs to involve > patterns. > > Thanks, > Richard. > > 2022-04-13 Richard Biener <rguent...@suse.de> > > PR tree-optimization/104010 > PR tree-optimization/103941 > * tree-vect-slp.cc (vect_bb_slp_scalar_cost): When > we run into stmts in patterns continue walking those > for uses outside of the vectorized region instead of > marking the lane live. > > * gcc.target/i386/pr103941-1.c: New testcase. > * gcc.target/i386/pr103941-2.c: Likewise. > --- > gcc/testsuite/gcc.target/i386/pr103941-1.c | 14 +++++++ > gcc/testsuite/gcc.target/i386/pr103941-2.c | 12 ++++++ > gcc/tree-vect-slp.cc | 47 ++++++++++++++++------ > 3 files changed, 61 insertions(+), 12 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr103941-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr103941-2.c > > diff --git a/gcc/testsuite/gcc.target/i386/pr103941-1.c > b/gcc/testsuite/gcc.target/i386/pr103941-1.c > new file mode 100644 > index 00000000000..524fdd0b4b1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr103941-1.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse2" } */ > + > +unsigned char ur[16], ua[16], ub[16]; > + > +void avgu_v2qi (void) > +{ > + int i; > + > + for (i = 0; i < 2; i++) > + ur[i] = (ua[i] + ub[i] + 1) >> 1; > +} > + > +/* { dg-final { scan-assembler "pavgb" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr103941-2.c > b/gcc/testsuite/gcc.target/i386/pr103941-2.c > new file mode 100644 > index 00000000000..972a32be997 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr103941-2.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse2" } */ > + > +void foo (int *c, float *x, float *y) > +{ > + c[0] = x[0] < y[0]; > + c[1] = x[1] < y[1]; > + c[2] = x[2] < y[2]; > + c[3] = x[3] < y[3]; > +} > + > +/* { dg-final { scan-assembler "cmpltps" } } */ > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index 4ac2b70303c..c7687065374 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -5185,22 +5185,45 @@ vect_bb_slp_scalar_cost (vec_info *vinfo, > the scalar cost. */ > if (!STMT_VINFO_LIVE_P (stmt_info)) > { > - FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF) > + auto_vec<gimple *, 8> worklist; > + hash_set<gimple *> *worklist_visited = NULL; > + worklist.quick_push (orig_stmt); > + do > { > - imm_use_iterator use_iter; > - gimple *use_stmt; > - FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) > - if (!is_gimple_debug (use_stmt)) > - { > - stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt); > - if (!use_stmt_info > - || !vectorized_scalar_stmts.contains (use_stmt_info)) > + gimple *work_stmt = worklist.pop (); > + FOR_EACH_PHI_OR_STMT_DEF (def_p, work_stmt, op_iter, SSA_OP_DEF) > + { > + imm_use_iterator use_iter; > + gimple *use_stmt; > + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, > + DEF_FROM_PTR (def_p)) > + if (!is_gimple_debug (use_stmt)) > { > - (*life)[i] = true; > - break; > + stmt_vec_info use_stmt_info > + = vinfo->lookup_stmt (use_stmt); > + if (!use_stmt_info > + || !vectorized_scalar_stmts.contains > (use_stmt_info)) > + { > + if (STMT_VINFO_IN_PATTERN_P (use_stmt_info))
Building SPEC 2017 reveals a missing use_stmt_info && here, consider that fixed. Richard.