On Thu, Jun 27, 2024 at 5:15 PM Feng Xue OS <f...@os.amperecomputing.com> wrote: > > I added two test cases for the examples your mentioned.
OK, thanks. > BTW: would you please look over another 3 lane-reducing patches that have > been updated? If ok, I would consider to check them in. Sorry, I've been distracted by other things the last two weeks - it's on my list to look at. Richard. > Thanks, > Feng > > -- > > Allow shift-by-induction for slp node, when it is single lane, which is > aligned with the original loop-based handling. > > gcc/ > * tree-vect-stmts.cc (vectorizable_shift): Allow shift-by-induction > for single-lane slp node. > > gcc/testsuite/ > * gcc.dg/vect/vect-shift-6.c > * gcc.dg/vect/vect-shift-7.c > --- > gcc/testsuite/gcc.dg/vect/vect-shift-6.c | 51 +++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/vect-shift-7.c | 65 ++++++++++++++++++++++++ > gcc/tree-vect-stmts.cc | 2 +- > 3 files changed, 117 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-shift-6.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-shift-7.c > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-6.c > b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c > new file mode 100644 > index 00000000000..940f7f2a4db > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c > @@ -0,0 +1,51 @@ > +/* { dg-require-effective-target vect_shift } */ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +#define N 32 > + > +int A[N]; > +int B[N]; > + > +#define FN(name) \ > +__attribute__((noipa)) \ > +void name(int *a) \ > +{ \ > + for (int i = 0; i < N / 2; i++) \ > + { \ > + a[2 * i + 0] <<= i; \ > + a[2 * i + 1] <<= i; \ > + } \ > +} > + > + > +FN(foo_vec) > + > +#pragma GCC push_options > +#pragma GCC optimize ("O0") > +FN(foo_novec) > +#pragma GCC pop_options > + > +int main () > +{ > + int i; > + > + check_vect (); > + > +#pragma GCC novector > + for (i = 0; i < N; i++) > + A[i] = B[i] = -(i + 1); > + > + foo_vec(A); > + foo_novec(B); > + > + /* check results: */ > +#pragma GCC novector > + for (i = 0; i < N; i++) > + if (A[i] != B[i]) > + abort (); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-7.c > b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c > new file mode 100644 > index 00000000000..a33b120343b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c > @@ -0,0 +1,65 @@ > +/* { dg-require-effective-target vect_shift } */ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +#define N 32 > +#define M 64 > + > +int A[N]; > +int B[N]; > + > +#define FN(name) \ > +__attribute__((noipa)) \ > +void name(int *a) \ > +{ \ > + for (int i = 0; i < N / 2; i++) \ > + { \ > + int s1 = i; \ > + int s2 = s1 + 1; \ > + int r1 = 0; \ > + int r2 = 1; \ > + \ > + for (int j = 0; j < M; j++) \ > + { \ > + r1 += j << s1; \ > + r2 += j << s2; \ > + s1++; \ > + s2++; \ > + } \ > + \ > + a[2 * i + 0] = r1; \ > + a[2 * i + 1] = r2; \ > + } \ > +} > + > + > +FN(foo_vec) > + > +#pragma GCC push_options > +#pragma GCC optimize ("O0") > +FN(foo_novec) > +#pragma GCC pop_options > + > +int main () > +{ > + int i; > + > + check_vect (); > + > +#pragma GCC novector > + for (i = 0; i < N; i++) > + A[i] = B[i] = 0; > + > + foo_vec(A); > + foo_novec(B); > + > + /* check results: */ > +#pragma GCC novector > + for (i = 0; i < N; i++) > + if (A[i] != B[i]) > + abort (); > + > + return 0; > +} > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index ca6052662a3..840e162c7f0 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -6247,7 +6247,7 @@ vectorizable_shift (vec_info *vinfo, > if ((dt[1] == vect_internal_def > || dt[1] == vect_induction_def > || dt[1] == vect_nested_cycle) > - && !slp_node) > + && (!slp_node || SLP_TREE_LANES (slp_node) == 1)) > scalar_shift_arg = false; > else if (dt[1] == vect_constant_def > || dt[1] == vect_external_def > -- > 2.17.1 > > ________________________________________ > From: Richard Biener <richard.guent...@gmail.com> > Sent: Thursday, June 27, 2024 12:49 AM > To: Feng Xue OS > Cc: gcc-patches@gcc.gnu.org > Subject: Re: [PATCH] vect: Fix shift-by-induction for single-lane slp > > On Wed, Jun 26, 2024 at 4:58 PM Feng Xue OS <f...@os.amperecomputing.com> > wrote: > > > > Allow shift-by-induction for slp node, when it is single lane, which is > > aligned with the original loop-based handling. > > OK. > > Did you try whether we handle multiple lanes correctly? The simplest > case would be a loop > body with say > > a[2*i] = x << i; > a[2*i+1] = x << i; > > I'm not sure how we match up multiple (different) inductions in the > same SLP node, > but one node could be x << (i + 1). > > Note you enable a nested cycle def the same way, I think that could be > treated like > an internal def and also generally. There's probably no test coverage > for that though. > Sth like > > for (m ...) > { > i = m; > j = i + 1; > for (k ...) > { > res1 += k << i; > res2 += k << j; > i++; > j++; > } > a[2*m] = res1; > a[2*m+1] = res2; > } > > Thanks, > Richard. > > > Thanks, > > Feng > > > > --- > > gcc/tree-vect-stmts.cc | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > > index ca6052662a3..840e162c7f0 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -6247,7 +6247,7 @@ vectorizable_shift (vec_info *vinfo, > > if ((dt[1] == vect_internal_def > > || dt[1] == vect_induction_def > > || dt[1] == vect_nested_cycle) > > - && !slp_node) > > + && (!slp_node || SLP_TREE_LANES (slp_node) == 1)) > > scalar_shift_arg = false; > > else if (dt[1] == vect_constant_def > > || dt[1] == vect_external_def > > -- > > 2.17.1