Richard Biener <[email protected]> 于2024年5月28日周二 17:47写道:
>
> The following avoids accounting single-lane SLP to the discovery
> limit. As the two testcases show this makes discovery fail,
> unfortunately even not the same across targets. The following
> should fix two FAILs for GCN as a side-effect.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
>
> PR tree-optimization/115254
> * tree-vect-slp.cc (vect_build_slp_tree): Only account
> multi-lane SLP to limit.
>
> * gcc.dg/vect/slp-cond-2-big-array.c: Expect 4 times SLP.
> * gcc.dg/vect/slp-cond-2.c: Likewise.
With this patch, MIPS/MSA still has only 3 times SLP.
I am digging the problem
> ---
> .../gcc.dg/vect/slp-cond-2-big-array.c | 2 +-
> gcc/testsuite/gcc.dg/vect/slp-cond-2.c | 2 +-
> gcc/tree-vect-slp.cc | 31 +++++++++++--------
> 3 files changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
> b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
> index cb7eb94b3a3..9a9f63c0b8d 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
> @@ -128,4 +128,4 @@ main ()
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"
> } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect"
> } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
> b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
> index 1dcee46cd95..08bbb3dbec6 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
> @@ -128,4 +128,4 @@ main ()
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"
> } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect"
> } } */
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 0dd9a4daf6a..bbfde8849c1 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -1725,21 +1725,26 @@ vect_build_slp_tree (vec_info *vinfo,
> SLP_TREE_SCALAR_STMTS (res) = stmts;
> bst_map->put (stmts.copy (), res);
>
> - if (*limit == 0)
> + /* Single-lane SLP doesn't have the chance of run-away, do not account
> + it to the limit. */
> + if (stmts.length () > 1)
> {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_NOTE, vect_location,
> - "SLP discovery limit exceeded\n");
> - /* Mark the node invalid so we can detect those when still in use
> - as backedge destinations. */
> - SLP_TREE_SCALAR_STMTS (res) = vNULL;
> - SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
> - res->failed = XNEWVEC (bool, group_size);
> - memset (res->failed, 0, sizeof (bool) * group_size);
> - memset (matches, 0, sizeof (bool) * group_size);
> - return NULL;
> + if (*limit == 0)
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "SLP discovery limit exceeded\n");
> + /* Mark the node invalid so we can detect those when still in use
> + as backedge destinations. */
> + SLP_TREE_SCALAR_STMTS (res) = vNULL;
> + SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
> + res->failed = XNEWVEC (bool, group_size);
> + memset (res->failed, 0, sizeof (bool) * group_size);
> + memset (matches, 0, sizeof (bool) * group_size);
> + return NULL;
> + }
> + --*limit;
> }
> - --*limit;
>
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> --
> 2.35.3
--
YunQiang Su