On Wed, Dec 10, 2014 at 02:37:13AM +0300, Evgeny Stupachenko wrote: > 2014-12-10 Evgeny Stupachenko <evstu...@gmail.com>
I went ahead and filed a PR, so we have something to refer to in the ChangeLog and name the testcases. > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -47546,6 +47546,7 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d) > dcopy.op0 = dcopy.op1 = d->op1; > else > dcopy.op0 = dcopy.op1 = d->op0; > + dcopy.target = gen_reg_rtx (vmode); > dcopy.one_operand_p = true; > > for (i = 0; i < nelt; ++i) This is incorrect if d->testing_p is true (can happen e.g. on the testcase below; generally, when testing_p is true, we should not use gen_reg_rtx because it can be called from GIMPLE optimizers before init_emit is called. See PR57896 for details. If d->testing_p is true, target is never the same as any of the operands, all 3 are virtual registers, so there is no overlap (and even if there would be, it should not matter). > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/blend.c > @@ -0,0 +1,63 @@ > +/* Test correctness of size 3 store groups permutation. */ > +/* { dg-do run } */ The testcase as is does not fail even with unfixed compiler. I had to add some dg-additional-options. > + bar(2, q); > + for (i = 0; i < N; i++) > + if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1) > + return 1; Furthermore, tests in the testsuite should fail through abort () / __builtin_abort () instead of just returning non-zero exit code. So, here is complete updated patch I'm going to bootstrap/regtest. 2014-12-10 Jakub Jelinek <ja...@redhat.com> Evgeny Stupachenko <evstu...@gmail.com> * config/i386/i386.c (expand_vec_perm_pblendv): If not testing_p, set dcopy.target to a new pseudo. * gcc.dg/vect/pr64252.c: New test. * gcc.dg/pr64252.c: New test. * gcc.target/i386/avx2-pr64252.c: New test. --- gcc/config/i386/i386.c.jj 2014-12-10 09:45:15.000000000 +0100 +++ gcc/config/i386/i386.c 2014-12-10 11:38:12.530795610 +0100 @@ -47554,6 +47554,8 @@ expand_vec_perm_pblendv (struct expand_v dcopy.op0 = dcopy.op1 = d->op1; else dcopy.op0 = dcopy.op1 = d->op0; + if (!d->testing_p) + dcopy.target = gen_reg_rtx (vmode); dcopy.one_operand_p = true; for (i = 0; i < nelt; ++i) --- gcc/testsuite/gcc.dg/vect/pr64252.c.jj 2014-12-10 11:42:47.669991028 +0100 +++ gcc/testsuite/gcc.dg/vect/pr64252.c 2014-12-10 11:47:43.895818223 +0100 @@ -0,0 +1,66 @@ +/* PR target/64252 */ +/* Test correctness of size 3 store groups permutation. */ +/* { dg-do run } */ +/* { dg-additional-options "-O3" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#define N 50 + +enum num3 +{ + a, b, c +}; + +struct flags +{ + enum num3 f; + unsigned int c; + unsigned int p; +}; + +struct flagsN +{ + struct flags a[N]; +}; + +void +bar (int n, struct flagsN *ff) +{ + struct flagsN *fc; + for (fc = ff + 1; fc < (ff + n); fc++) + { + int i; + for (i = 0; i < N; ++i) + { + ff->a[i].f = 0; + ff->a[i].c = i; + ff->a[i].p = -1; + } + for (i = 0; i < n; i++) + { + int j; + for (j = 0; j < N - n; ++j) + { + fc->a[i + j].f = 0; + fc->a[i + j].c = j + i; + fc->a[i + j].p = -1; + } + } + } +} + +struct flagsN q[2]; + +int main() +{ + int i; + check_vect (); + bar(2, q); + for (i = 0; i < N; i++) + if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1) + abort (); + return 0; +} +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/pr64252.c.jj 2014-12-10 11:26:05.649467180 +0100 +++ gcc/testsuite/gcc.dg/pr64252.c 2014-12-10 11:25:27.057139759 +0100 @@ -0,0 +1,30 @@ +/* PR target/64252 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +typedef unsigned int V __attribute__((vector_size (32))); + +__attribute__((noinline, noclone)) void +foo (V *a, V *b, V *c, V *d, V *e) +{ + V t = __builtin_shuffle (*a, *b, *c); + V v = __builtin_shuffle (t, (V) { ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U }, (V) { 0, 1, 8, 3, 4, 5, 9, 7 }); + v = v + *d; + *e = v; +} + +int +main () +{ + V a, b, c, d, e; + int i; + a = (V) { 1, 2, 3, 4, 5, 6, 7, 8 }; + b = (V) { 9, 10, 11, 12, 13, 14, 15, 16 }; + c = (V) { 1, 3, 5, 7, 9, 11, 13, 15 }; + d = (V) { 0, 0, 0, 0, 0, 0, 0, 0 }; + foo (&a, &b, &c, &d, &e); + for (i = 0; i < 8; i++) + if (e[i] != ((i == 2 || i == 6) ? ~0U : 2 + 2 * i)) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.target/i386/avx2-pr64252.c.jj 2014-12-10 11:27:36.868877426 +0100 +++ gcc/testsuite/gcc.target/i386/avx2-pr64252.c 2014-12-10 11:30:49.500520279 +0100 @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx2" } */ +/* { dg-require-effective-target avx2 } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/pr64252.c" + +static void +avx2_test (void) +{ + do_main (); +} Jakub