Richard Biener <rguent...@suse.de> writes: > This adds a missing check to epilogue reduction re-use, namely > that we can do hi/lo extracts from the vector when demoting it > to the epilogue vector size. > > I've chosen to add a can_vec_extract helper to optabs-query.h, > in the future we might want to simplify the vectorizers life by > handling vector-from-vector extraction via BIT_FIELD_REFs during > RTL expansion via the mode punning when the vec_extract is not > directly supported. > > I'm not 100% sure we can always do the punning of the > vec_extract result to a vector mode of the same size, but then > I'm also not sure how to check for that (the vectorizer doesn't > in other places it does that at the moment, but I suppose we > eventually just go through memory there)? > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. > > Does this look OK?
LGTM. I guess some of the existing optab checks could be simplified using the new helper, but that's a separate clean-up. Thanks, Richard > > Thanks, > Richard. > > 2022-02-09 Richard Biener <rguent...@suse.de> > > PR tree-optimization/104445 > PR tree-optimization/102832 > * optabs-query.h (can_vec_extract): New. > * optabs-query.cc (can_vec_extract): Likewise. > * tree-vect-loop.cc (vect_find_reusable_accumulator): Check > we can extract a hi/lo part from the larger vector. > > * gcc.dg/vect/pr104445.c: New testcase. > --- > gcc/optabs-query.cc | 33 ++++++++++++++++++++++++++++ > gcc/optabs-query.h | 1 + > gcc/testsuite/gcc.dg/vect/pr102832.c | 12 ++++++++++ > gcc/testsuite/gcc.dg/vect/pr104445.c | 16 ++++++++++++++ > gcc/tree-vect-loop.cc | 8 +++++-- > 5 files changed, 68 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/pr102832.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr104445.c > > diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc > index 2ce8d74db16..fa88b4bede0 100644 > --- a/gcc/optabs-query.cc > +++ b/gcc/optabs-query.cc > @@ -763,3 +763,36 @@ supports_vec_scatter_store_p (machine_mode mode) > return this_fn_optabs->supports_vec_scatter_store[mode] > 0; > } > > +/* Whether we can extract part of the vector mode MODE as > + (scalar or vector) mode EXTR_MODE. */ > + > +bool > +can_vec_extract (machine_mode mode, machine_mode extr_mode) > +{ > + if (!VECTOR_MODE_P (mode)) > + return false; > + > + unsigned m; > + if (!constant_multiple_p (GET_MODE_SIZE (mode), > + GET_MODE_SIZE (extr_mode), &m)) > + return false; > + > + if (convert_optab_handler (vec_extract_optab, mode, extr_mode) > + != CODE_FOR_nothing) > + return true; > + > + if (!VECTOR_MODE_P (extr_mode)) > + return false; > + > + /* Besides a direct vec_extract we can also use an element extract from > + an integer vector mode with elements of the size of the extr_mode. */ > + scalar_int_mode imode; > + machine_mode vmode; > + if (!int_mode_for_size (GET_MODE_SIZE (extr_mode), 0).exists (&imode) > + || !related_vector_mode (mode, imode, m).exists (&vmode) > + || (convert_optab_handler (vec_extract_optab, vmode, imode) > + == CODE_FOR_nothing)) > + return false; > + /* We assume we can pun mode to vmode and imode to extr_mode. */ > + return true; > +} > diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h > index 8b768c1797d..b9c9fd6f64d 100644 > --- a/gcc/optabs-query.h > +++ b/gcc/optabs-query.h > @@ -195,6 +195,7 @@ bool can_atomic_load_p (machine_mode); > bool lshift_cheap_p (bool); > bool supports_vec_gather_load_p (machine_mode = E_VOIDmode); > bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode); > +bool can_vec_extract (machine_mode, machine_mode); > > /* Version of find_widening_optab_handler_and_mode that operates on > specific mode types. */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr102832.c > b/gcc/testsuite/gcc.dg/vect/pr102832.c > new file mode 100644 > index 00000000000..7cb4db5e4c7 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr102832.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3" } */ > +/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=128" { > target aarch64-*-* } } */ > + > +int a, b; > +char c; > +signed char d(int e, int f) { return e - f; } > +void g() { > + a = 0; > + for (; a >= -17; a = d(a, 1)) > + c ^= b; > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr104445.c > b/gcc/testsuite/gcc.dg/vect/pr104445.c > new file mode 100644 > index 00000000000..8ec3b3b0f1e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr104445.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3" } */ > +/* { dg-additional-options "-mavx -mno-mmx" { target x86_64-*-* i?86-*-* } } > */ > + > +signed char a; > +signed char f (int i, int j) > +{ > + signed char c; > + while (i != 0) > + { > + a ^= j; > + ++c; > + ++i; > + } > + return c; > +} > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 4860bfd3344..9916ae46460 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -4997,7 +4997,8 @@ vect_find_reusable_accumulator (loop_vec_info > loop_vinfo, > if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), > TYPE_VECTOR_SUBPARTS (vectype), &m)) > return false; > - /* Check the intermediate vector types are available. */ > + /* Check the intermediate vector types and operations are available. */ > + tree prev_vectype = old_vectype; > while (m > 2) > { > m /= 2; > @@ -5006,8 +5007,11 @@ vect_find_reusable_accumulator (loop_vec_info > loop_vinfo, > exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m)); > if (!intermediate_vectype > || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info), > - intermediate_vectype)) > + intermediate_vectype) > + || !can_vec_extract (TYPE_MODE (prev_vectype), > + TYPE_MODE (intermediate_vectype))) > return false; > + prev_vectype = intermediate_vectype; > } > > /* Non-SLP reductions might apply an adjustment after the reduction