https://gcc.gnu.org/g:be8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c
commit r15-5876-gbe8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c Author: Richard Biener <rguent...@suse.de> Date: Mon Dec 2 14:59:00 2024 +0100 tree-optimization/117874 - missed vectorization that's formerly hybrid With SLP forced we fail to consider using single-lane SLP for a case that we still end up discovering as hybrid (in the PR in question this is because we run into the SLP discovery limit due to excessive association). PR tree-optimization/117874 * tree-vect-loop.cc (vect_analyze_loop_2): When non-SLP analysis fails, try single-lane SLP. * gcc.dg/vect/pr117874.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/pr117874.c | 50 ++++++++++++++++++++++++++++++++++++ gcc/tree-vect-loop.cc | 7 +++-- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr117874.c b/gcc/testsuite/gcc.dg/vect/pr117874.c new file mode 100644 index 000000000000..27e5f8ca3692 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117874.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ + +typedef struct { + double real; + double imag; +} complex; + +typedef struct { complex e[3][3]; } su3_matrix; + +void mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c) +{ + int j; + double a0r,a0i,a1r,a1i,a2r,a2i; + double b0r,b0i,b1r,b1i,b2r,b2i; + for(j=0;j<3;j++) + { + a0r=a->e[0][0].real; a0i=a->e[0][0].imag; + b0r=b->e[0][j].real; b0i=b->e[0][j].imag; + a1r=a->e[1][0].real; a1i=a->e[1][0].imag; + b1r=b->e[1][j].real; b1i=b->e[1][j].imag; + a2r=a->e[2][0].real; a2i=a->e[2][0].imag; + b2r=b->e[2][j].real; b2i=b->e[2][j].imag; + + c->e[0][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i; + c->e[0][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r; + + a0r=a->e[0][1].real; a0i=a->e[0][1].imag; + b0r=b->e[0][j].real; b0i=b->e[0][j].imag; + a1r=a->e[1][1].real; a1i=a->e[1][1].imag; + b1r=b->e[1][j].real; b1i=b->e[1][j].imag; + a2r=a->e[2][1].real; a2i=a->e[2][1].imag; + b2r=b->e[2][j].real; b2i=b->e[2][j].imag; + + c->e[1][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i; + c->e[1][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r; + + a0r=a->e[0][2].real; a0i=a->e[0][2].imag; + b0r=b->e[0][j].real; b0i=b->e[0][j].imag; + a1r=a->e[1][2].real; a1i=a->e[1][2].imag; + b1r=b->e[1][j].real; b1i=b->e[1][j].imag; + a2r=a->e[2][2].real; a2i=a->e[2][2].imag; + b2r=b->e[2][j].real; b2i=b->e[2][j].imag; + + c->e[2][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i; + c->e[2][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_hw_misalign } } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 5a24fb8bf4c8..85209604486a 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3005,10 +3005,9 @@ start_over: ok = vect_analyze_loop_operations (loop_vinfo); if (!ok) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "bad operation or unsupported loop bound.\n"); - return ok; + ok = opt_result::failure_at (vect_location, + "bad operation or unsupported loop bound\n"); + goto again; } /* For now, we don't expect to mix both masking and length approaches for one