https://gcc.gnu.org/g:be8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c

commit r15-5876-gbe8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c
Author: Richard Biener <rguent...@suse.de>
Date:   Mon Dec 2 14:59:00 2024 +0100

    tree-optimization/117874 - missed vectorization that's formerly hybrid
    
    With SLP forced we fail to consider using single-lane SLP for a case
    that we still end up discovering as hybrid (in the PR in question
    this is because we run into the SLP discovery limit due to excessive
    association).
    
            PR tree-optimization/117874
            * tree-vect-loop.cc (vect_analyze_loop_2): When non-SLP
            analysis fails, try single-lane SLP.
    
            * gcc.dg/vect/pr117874.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr117874.c | 50 ++++++++++++++++++++++++++++++++++++
 gcc/tree-vect-loop.cc                |  7 +++--
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr117874.c 
b/gcc/testsuite/gcc.dg/vect/pr117874.c
new file mode 100644
index 000000000000..27e5f8ca3692
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr117874.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+typedef struct {
+    double real;
+    double imag;
+} complex;
+
+typedef struct { complex e[3][3]; } su3_matrix;
+
+void mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
+{
+  int j;
+  double a0r,a0i,a1r,a1i,a2r,a2i;
+  double b0r,b0i,b1r,b1i,b2r,b2i;
+  for(j=0;j<3;j++)
+    {
+      a0r=a->e[0][0].real; a0i=a->e[0][0].imag;
+      b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+      a1r=a->e[1][0].real; a1i=a->e[1][0].imag;
+      b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+      a2r=a->e[2][0].real; a2i=a->e[2][0].imag;
+      b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+      c->e[0][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + 
a2i*b2i;
+      c->e[0][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - 
a2i*b2r;
+
+      a0r=a->e[0][1].real; a0i=a->e[0][1].imag;
+      b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+      a1r=a->e[1][1].real; a1i=a->e[1][1].imag;
+      b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+      a2r=a->e[2][1].real; a2i=a->e[2][1].imag;
+      b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+      c->e[1][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + 
a2i*b2i;
+      c->e[1][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - 
a2i*b2r;
+
+      a0r=a->e[0][2].real; a0i=a->e[0][2].imag;
+      b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+      a1r=a->e[1][2].real; a1i=a->e[1][2].imag;
+      b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+      a2r=a->e[2][2].real; a2i=a->e[2][2].imag;
+      b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+      c->e[2][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + 
a2i*b2i;
+      c->e[2][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - 
a2i*b2r;
+    }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target 
vect_hw_misalign } } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5a24fb8bf4c8..85209604486a 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3005,10 +3005,9 @@ start_over:
   ok = vect_analyze_loop_operations (loop_vinfo);
   if (!ok)
     {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "bad operation or unsupported loop bound.\n");
-      return ok;
+      ok = opt_result::failure_at (vect_location,
+                                  "bad operation or unsupported loop bound\n");
+      goto again;
     }
 
   /* For now, we don't expect to mix both masking and length approaches for one

Reply via email to