Hi! If there are exactly 2 complex loads and some other loads in SLP instance, we sometimes miscompile things because vect_supported_load_permutation_p skips important checks.
Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.7? 2012-05-21 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/53366 * tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut tests if complex_numbers == 2, but there are non-complex number loads too. * gcc.dg/torture/pr53366-1.c: New test. * gcc.dg/torture/pr53366-2.c: New test. * gcc.target/i386/pr53366-1.c: New test. * gcc.target/i386/pr53366-2.c: New test. --- gcc/tree-vect-slp.c.jj 2012-04-19 11:09:13.000000000 +0200 +++ gcc/tree-vect-slp.c 2012-05-21 12:46:40.078674688 +0200 @@ -1199,7 +1199,8 @@ vect_supported_load_permutation_p (slp_i /* We checked that this case ok, so there is no need to proceed with permutation tests. */ - if (complex_numbers == 2) + if (complex_numbers == 2 + && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2) { VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn)); VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); --- gcc/testsuite/gcc.dg/torture/pr53366-1.c.jj 2012-05-21 12:55:47.220474343 +0200 +++ gcc/testsuite/gcc.dg/torture/pr53366-1.c 2012-05-21 12:53:40.000000000 +0200 @@ -0,0 +1,70 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run } */ + +extern void abort (void); + +struct S { double v[3]; }; +struct T { struct S r, i; }; +struct U { struct T j[5]; }; + +void +foo (struct U *__restrict p1, struct U *__restrict p2, + struct S l1, struct S l2, struct S l3, struct S l4, + const double _Complex * __restrict x, int y, int z) +{ + int i, j; + while (y < z - 2) + { + for (j = 0; j < 5; ++j) + { + double a = __real__ x[5 * y + j]; + double b = __imag__ x[5 * y + j]; + double c = __real__ x[5 * (y + 2) + j]; + double d = __imag__ x[5 * (y + 2) + j]; + double e = __real__ x[5 * (y + 1) + j]; + double f = __imag__ x[5 * (y + 1) + j]; + double g = __real__ x[5 * (y + 3) + j]; + double h = __imag__ x[5 * (y + 3) + j]; + for (i = 0; i < 3; ++i) + { + p1->j[j].r.v[i] += l2.v[i] * a; + p1->j[j].r.v[i] += l4.v[i] * c; + p1->j[j].i.v[i] += l2.v[i] * b; + p1->j[j].i.v[i] += l4.v[i] * d; + p2->j[j].r.v[i] += l3.v[i] * e; + p2->j[j].r.v[i] += l1.v[i] * g; + p2->j[j].i.v[i] += l3.v[i] * f; + p2->j[j].i.v[i] += l1.v[i] * h; + } + } + y += 4; + } +} + +_Complex double x[5005]; +struct U p1, p2; + +int +main () +{ + int i, j; + struct S l1, l2, l3, l4; + for (i = 0; i < 5005; ++i) + x[i] = i + 1.0iF * (2 * i); + for (i = 0; i < 3; ++i) + { + l1.v[i] = 1; + l2.v[i] = 2; + l3.v[i] = 3; + l4.v[i] = 4; + } + foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000); + for (j = 0; j < 5; ++j) + for (i = 0; i < 3; ++i) + if (p1.j[j].r.v[i] != 3752430 + j * 1494.0 + || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2 + || p2.j[j].r.v[i] != 2502450 + j * 996.0 + || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2) + abort (); + return 0; +} --- gcc/testsuite/gcc.dg/torture/pr53366-2.c.jj 2012-05-21 12:55:50.011459264 +0200 +++ gcc/testsuite/gcc.dg/torture/pr53366-2.c 2012-05-21 12:54:48.000000000 +0200 @@ -0,0 +1,43 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run } */ + +extern void abort (void); + +struct T { float r[3], i[3]; }; +struct U { struct T j[2]; }; + +void __attribute__ ((noinline)) +foo (struct U *__restrict y, const float _Complex *__restrict x) +{ + int i, j; + for (j = 0; j < 2; ++j) + { + float a = __real__ x[j]; + float b = __imag__ x[j]; + float c = __real__ x[j + 2]; + float d = __imag__ x[j + 2]; + for (i = 0; i < 3; ++i) + { + y->j[j].r[i] = y->j[j].r[i] + a + c; + y->j[j].i[i] = y->j[j].i[i] + b + d; + } + } +} + +_Complex float x[4]; +struct U y; + +int +main () +{ + int i, j; + for (i = 0; i < 4; ++i) + x[i] = i + 1.0iF * (2 * i); + foo (&y, x); + for (j = 0; j < 2; ++j) + for (i = 0; i < 3; ++i) + if (y.j[j].r[i] != __real__ (x[j] + x[j + 2]) + || y.j[j].i[i] != __imag__ (x[j] + x[j + 2])) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.target/i386/pr53366-1.c.jj 2012-05-21 12:56:54.091092771 +0200 +++ gcc/testsuite/gcc.target/i386/pr53366-1.c 2012-05-21 13:14:01.355210995 +0200 @@ -0,0 +1,5 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run { target avx_runtime } } */ +/* { dg-options "-O3 -mavx" } */ + +#include "../../gcc.dg/torture/pr53366-1.c" --- gcc/testsuite/gcc.target/i386/pr53366-2.c.jj 2012-05-21 12:56:56.868076994 +0200 +++ gcc/testsuite/gcc.target/i386/pr53366-2.c 2012-05-21 13:14:08.358172604 +0200 @@ -0,0 +1,5 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run { target avx_runtime } } */ +/* { dg-options "-O3 -mavx" } */ + +#include "../../gcc.dg/torture/pr53366-2.c" Jakub