On Wed, Dec 10, 2014 at 12:03 PM, Jakub Jelinek <ja...@redhat.com> wrote: > On Wed, Dec 10, 2014 at 02:37:13AM +0300, Evgeny Stupachenko wrote: >> 2014-12-10 Evgeny Stupachenko <evstu...@gmail.com> > > I went ahead and filed a PR, so we have something to refer to in the > ChangeLog and name the testcases.
Thanks! >> --- a/gcc/config/i386/i386.c >> +++ b/gcc/config/i386/i386.c >> @@ -47546,6 +47546,7 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d) >> dcopy.op0 = dcopy.op1 = d->op1; >> else >> dcopy.op0 = dcopy.op1 = d->op0; >> + dcopy.target = gen_reg_rtx (vmode); >> dcopy.one_operand_p = true; >> >> for (i = 0; i < nelt; ++i) > > This is incorrect if d->testing_p is true (can happen e.g. on the testcase > below; generally, when testing_p is true, we should not use gen_reg_rtx > because it can be called from GIMPLE optimizers before init_emit is called. > See PR57896 for details. Yes, this was also my concern. We've had some nasty failures occurring when vReg was generated after reload. > If d->testing_p is true, target is never the same as any of the operands, > all 3 are virtual registers, so there is no overlap (and even if there would > be, it should not matter). > >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/blend.c >> @@ -0,0 +1,63 @@ >> +/* Test correctness of size 3 store groups permutation. */ >> +/* { dg-do run } */ > > The testcase as is does not fail even with unfixed compiler. I had to add > some dg-additional-options. > >> + bar(2, q); >> + for (i = 0; i < N; i++) >> + if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1) >> + return 1; > > Furthermore, tests in the testsuite should fail through abort () > / __builtin_abort () instead of just returning non-zero exit code. > > So, here is complete updated patch I'm going to bootstrap/regtest. > > 2014-12-10 Jakub Jelinek <ja...@redhat.com> > Evgeny Stupachenko <evstu...@gmail.com> > > * config/i386/i386.c (expand_vec_perm_pblendv): If not testing_p, > set dcopy.target to a new pseudo. > > * gcc.dg/vect/pr64252.c: New test. > * gcc.dg/pr64252.c: New test. > * gcc.target/i386/avx2-pr64252.c: New test. OK. Thanks, Uros. > > --- gcc/config/i386/i386.c.jj 2014-12-10 09:45:15.000000000 +0100 > +++ gcc/config/i386/i386.c 2014-12-10 11:38:12.530795610 +0100 > @@ -47554,6 +47554,8 @@ expand_vec_perm_pblendv (struct expand_v > dcopy.op0 = dcopy.op1 = d->op1; > else > dcopy.op0 = dcopy.op1 = d->op0; > + if (!d->testing_p) > + dcopy.target = gen_reg_rtx (vmode); > dcopy.one_operand_p = true; > > for (i = 0; i < nelt; ++i) > --- gcc/testsuite/gcc.dg/vect/pr64252.c.jj 2014-12-10 11:42:47.669991028 > +0100 > +++ gcc/testsuite/gcc.dg/vect/pr64252.c 2014-12-10 11:47:43.895818223 +0100 > @@ -0,0 +1,66 @@ > +/* PR target/64252 */ > +/* Test correctness of size 3 store groups permutation. */ > +/* { dg-do run } */ > +/* { dg-additional-options "-O3" } */ > +/* { dg-additional-options "-mavx" { target avx_runtime } } */ > + > +#include "tree-vect.h" > + > +#define N 50 > + > +enum num3 > +{ > + a, b, c > +}; > + > +struct flags > +{ > + enum num3 f; > + unsigned int c; > + unsigned int p; > +}; > + > +struct flagsN > +{ > + struct flags a[N]; > +}; > + > +void > +bar (int n, struct flagsN *ff) > +{ > + struct flagsN *fc; > + for (fc = ff + 1; fc < (ff + n); fc++) > + { > + int i; > + for (i = 0; i < N; ++i) > + { > + ff->a[i].f = 0; > + ff->a[i].c = i; > + ff->a[i].p = -1; > + } > + for (i = 0; i < n; i++) > + { > + int j; > + for (j = 0; j < N - n; ++j) > + { > + fc->a[i + j].f = 0; > + fc->a[i + j].c = j + i; > + fc->a[i + j].p = -1; > + } > + } > + } > +} > + > +struct flagsN q[2]; > + > +int main() > +{ > + int i; > + check_vect (); > + bar(2, q); > + for (i = 0; i < N; i++) > + if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1) > + abort (); > + return 0; > +} > +/* { dg-final { cleanup-tree-dump "vect" } } */ > --- gcc/testsuite/gcc.dg/pr64252.c.jj 2014-12-10 11:26:05.649467180 +0100 > +++ gcc/testsuite/gcc.dg/pr64252.c 2014-12-10 11:25:27.057139759 +0100 > @@ -0,0 +1,30 @@ > +/* PR target/64252 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +typedef unsigned int V __attribute__((vector_size (32))); > + > +__attribute__((noinline, noclone)) void > +foo (V *a, V *b, V *c, V *d, V *e) > +{ > + V t = __builtin_shuffle (*a, *b, *c); > + V v = __builtin_shuffle (t, (V) { ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U > }, (V) { 0, 1, 8, 3, 4, 5, 9, 7 }); > + v = v + *d; > + *e = v; > +} > + > +int > +main () > +{ > + V a, b, c, d, e; > + int i; > + a = (V) { 1, 2, 3, 4, 5, 6, 7, 8 }; > + b = (V) { 9, 10, 11, 12, 13, 14, 15, 16 }; > + c = (V) { 1, 3, 5, 7, 9, 11, 13, 15 }; > + d = (V) { 0, 0, 0, 0, 0, 0, 0, 0 }; > + foo (&a, &b, &c, &d, &e); > + for (i = 0; i < 8; i++) > + if (e[i] != ((i == 2 || i == 6) ? ~0U : 2 + 2 * i)) > + __builtin_abort (); > + return 0; > +} > --- gcc/testsuite/gcc.target/i386/avx2-pr64252.c.jj 2014-12-10 > 11:27:36.868877426 +0100 > +++ gcc/testsuite/gcc.target/i386/avx2-pr64252.c 2014-12-10 > 11:30:49.500520279 +0100 > @@ -0,0 +1,15 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx2" } */ > +/* { dg-require-effective-target avx2 } */ > + > +#include "avx2-check.h" > + > +#define main() do_main () > + > +#include "../../gcc.dg/pr64252.c" > + > +static void > +avx2_test (void) > +{ > + do_main (); > +} > > > Jakub