On Mon, 17 Mar 2025, Tamar Christina wrote:
> Hi All,
>
> Broadly speaking, these tests were failing because the BB limitation for
> SLP'ing
> loads in an || in an early break makes the loads end up in different BBs and
> so
> today we can't SLP them. This results in load_lanes being required to
> vectorize
> them because the alternative is loads with permutes which we don't allow.
>
> The original checks were only checking partial vectors, which ended up working
> because e.g. Adv. SIMD isn't a partial vector target, so it failed, and SVE
> was
> a partial vector target but also has load lanes so it passes.
>
> GCN however is a partial vector target without load lanes which makes the
> tests
> fail. As we require load_lanes for now, also check for them.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
Does that reduce the number of PASSes on x86_64?
> Cross checked the failing cases on amdgcn-amdhsa
> and all pass now.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/testsuite/ChangeLog:
>
> PR target/119286
> * gcc.dg/vect/bb-slp-41.c: Add pragma novector.
> * gcc.dg/vect/vect-early-break_133_pfa11.c: Should never vectorize today
> as indexes can be out of range.
> * gcc.dg/vect/vect-early-break_128.c: Require load_lanes as well.
> * gcc.dg/vect/vect-early-break_133_pfa10.c: Likewise.
> * gcc.dg/vect/vect-early-break_133_pfa8.c: Likewise.
> * gcc.dg/vect/vect-early-break_133_pfa9.c: Likewise.
> * gcc.dg/vect/vect-early-break_22.c: Likewise.
> * gcc.dg/vect/vect-early-break_26.c: Likewise.
> * gcc.dg/vect/vect-early-break_43.c: Likewise.
> * gcc.dg/vect/vect-early-break_44.c: Likewise.
> * gcc.dg/vect/vect-early-break_6.c: Likewise.
> * gcc.dg/vect/vect-early-break_56.c: Expect failures on group misalign.
>
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c
> b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c
> index
> 72245115f305de8ef26c9c481f160a05db8c3dcb..5a2bd4d2a33e41dbb33dc105b72f0ae6fd0e81ef
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c
> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c
> @@ -51,6 +51,7 @@ int main ()
> foo (a1, b);
> bar (a2, b);
>
> +#pragma GCC novector
> for (i = 0; i < ARR_SIZE; i++)
> if (a1[i] != a2[i])
> return 1;
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c
> index
> ed6baf2d451f3887076a1e9143035363128efe70..3d51d52358fc2308ca3614bd3c6fe079f9d839fa
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c
> @@ -3,8 +3,8 @@
> /* { dg-require-effective-target vect_early_break } */
> /* { dg-require-effective-target vect_int } */
>
> -/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target
> vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "vect" {
> target { ! vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target
> { vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "vect" {
> target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
> /* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
>
> #ifndef N
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa10.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa10.c
> index
> dd05046982524f15662be8df517716b581b8a2d9..2a58fb01401282fa857ffd56b79fc6e25936d03e
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa10.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa10.c
> @@ -6,9 +6,9 @@
> /* { dg-additional-options "-Ofast" } */
>
> /* Alignment requirement too big, load lanes targets can't safely vectorize
> this. */
> -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target {
> vect_partial_vectors || vect_load_lanes } } } } */
> -/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! {
> vect_partial_vectors || vect_load_lanes } } } } } */
> -/* { dg-final { scan-tree-dump-not "Alignment of access forced using
> peeling" "vect" { target { ! { vect_partial_vectors || vect_load_lanes } } }
> } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target {
> vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { !
> vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "Alignment of access forced using
> peeling" "vect" { target { ! vect_load_lanes } } } } */
>
> unsigned test4(char x, char *restrict vect_a, char *restrict vect_b, int n)
> {
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa11.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa11.c
> index
> 085dd9b81bb6943440f34d044cbd24ee2121657c..514bd37191744bf1df29c4967d52a9acee605205
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa11.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa11.c
> @@ -3,9 +3,8 @@
> /* { dg-require-effective-target vect_early_break } */
> /* { dg-require-effective-target vect_int } */
>
> -/* Gathers and scatters are not save to speculate across early breaks. */
> -/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { !
> vect_partial_vectors } } } } */
> -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target
> vect_partial_vectors } } } */
> +/* Gathers and scatters are not safe to speculate across early breaks. */
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
>
> #define N 1024
> int vect_a[N];
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa8.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa8.c
> index
> 25d3a62356baf127c89187b150810e4d31567c6f..90e59986dea8ff7a52f16d7b89aa40e26e6b8b15
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa8.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa8.c
> @@ -5,8 +5,8 @@
>
> /* { dg-additional-options "-Ofast" } */
>
> -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target
> vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { !
> vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target {
> vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { !
> vect_partial_vectors } || { ! vect_load_lanes } } } } } */
>
> char vect_a[1025];
> char vect_b[1025];
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa9.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa9.c
> index
> 10eb98b726acb32a0d1de4daf202724995bfa1a6..fa2b32e9b8599e4bdfbffe7eaa1f9a80ede7a74c
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa9.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa9.c
> @@ -6,8 +6,8 @@
> /* { dg-additional-options "-Ofast" } */
>
> /* Group size is uneven and second group is misaligned. Needs partial
> vectors. */
> -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target
> vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { !
> vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target {
> vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { !
> vect_partial_vectors } || { ! vect_load_lanes } } } } } */
> /* { dg-final { scan-tree-dump-not "Alignment of access forced using
> peeling" "vect" } } */
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c
> index
> f8f84fab97ab586847000af8b89448b0885ef5fc..8e91ac6b843bb8718d41f0c0ce5ec531039ec996
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c
> @@ -43,5 +43,5 @@ main ()
> }
>
> /* This will fail because we cannot SLP the load groups yet. */
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2
> "vect" { target vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1
> "vect" { target { ! vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2
> "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1
> "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } }
> */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c
> index
> 643016b2ccfea29ba36d65c8070f255cb8179481..f6688fc8ead168bae473485c8ac43dc35c1ee0c0
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c
> @@ -42,5 +42,5 @@ main ()
> }
>
> /* This will fail because we cannot SLP the load groups yet. */
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2
> "vect" { target vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1
> "vect" { target { ! vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2
> "vect" { target { vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1
> "vect" { target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } }
> */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c
> index
> 0cfa2428cc61d5f4ea0784367acea6436736970f..e6866deaa08f423db3f00240e7b4aaff69f38acb
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c
> @@ -28,5 +28,5 @@ unsigned test4(unsigned x)
> }
>
> /* This will fail because we cannot SLP the load groups yet. */
> -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" {
> target vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" {
> target { ! vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" {
> target { vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" {
> target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c
> index
> 0cfa2428cc61d5f4ea0784367acea6436736970f..e6866deaa08f423db3f00240e7b4aaff69f38acb
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c
> @@ -28,5 +28,5 @@ unsigned test4(unsigned x)
> }
>
> /* This will fail because we cannot SLP the load groups yet. */
> -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" {
> target vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" {
> target { ! vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" {
> target { vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" {
> target { { ! vect_partial_vectors } || { ! vect_load_lanes } } } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c
> index
> b35e737fa3b9137cd745c14f7ad915a3f81c38c4..76a1f9b3f55c23100064e151a2bb831d04d5bdc1
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c
> @@ -99,5 +99,5 @@ int main (void)
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 2 "vect" { xfail {
> vect_early_break && { ! vect_hw_misalign } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail {
> vect_early_break && { ! vect_hw_misalign } } } } } */
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c
> index
> c7cce81f52c80d83bd2c1face8cbd13f93834531..485cee63c63e772e5c134568e7c288ea260b69f5
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c
> @@ -6,8 +6,8 @@
> /* { dg-additional-options "-Ofast" } */
>
> /* This will fail because we cannot SLP the load groups yet. */
> -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target
> vect_partial_vectors } } } */
> -/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { !
> vect_partial_vectors } } } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target {
> vect_partial_vectors && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { !
> vect_partial_vectors } || { ! vect_load_lanes } } } } } */
>
> #define N 1024
> unsigned vect_a[N];
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)