On Mon, Oct 31, 2011 at 03:23:32PM +0100, Jakub Jelinek wrote: > Would be nice to cut down slightly this testcase into just one or two loops > that are vectorized and turn it into a runtime testcase which verifies > the vectorization was correct.
Here is one such testcase (though, in your case there are no loads for the indexes, on the other side you have 3 of the IVs each multiplied by some loop invariant and all added together. Though, on the other side in your case there are far more expressions. 2011-10-31 Jakub Jelinek <ja...@redhat.com> * gcc.target/i386/avx2-gather-4.c: New test. --- gcc/testsuite/gcc.target/i386/avx2-gather-4.c.jj 2011-10-31 15:58:57.000000000 +0100 +++ gcc/testsuite/gcc.target/i386/avx2-gather-4.c 2011-10-31 15:59:44.000000000 +0100 @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O3 -mavx2" } */ + +#include "avx2-check.h" + +#define N 1024 +int a[N], b[N], c[N], d[N]; + +__attribute__((noinline, noclone)) void +foo (float *__restrict p, float *__restrict q, float *__restrict r, + long s1, long s2, long s3) +{ + int i; + for (i = 0; i < N; i++) + p[i] = q[a[i] * s1 + b[i] * s2 + s3] * r[c[i] * s1 + d[i] * s2 + s3]; +} + +static void +avx2_test (void) +{ + int i; + float e[N], f[N], g[N]; + for (i = 0; i < N; i++) + { + a[i] = (i * 7) & (N / 8 - 1); + b[i] = (i * 13) & (N / 8 - 1); + c[i] = (i * 23) & (N / 8 - 1); + d[i] = (i * 5) & (N / 8 - 1); + e[i] = 16.5 + i; + f[i] = 127.5 - i; + } + foo (g, e, f, 3, 2, 4); + for (i = 0; i < N; i++) + if (g[i] != (float) ((20.5 + a[i] * 3 + b[i] * 2) + * (123.5 - c[i] * 3 - d[i] * 2))) + abort (); +} Jakub