This fixes PR54894. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
Richard. 2012-10-12 Richard Biener <rguent...@suse.de> PR tree-optimization/54894 * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Handle over-aligned scalar types properly. * gcc.dg/torture/pr54894.c: New testcase. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 192359) +++ gcc/tree-vect-stmts.c (working copy) @@ -6060,11 +6060,6 @@ get_vectype_for_scalar_type_and_size (tr && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) return NULL_TREE; - /* We can't build a vector type of elements with alignment bigger than - their size. */ - if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) - return NULL_TREE; - /* For vector types of elements whose mode precision doesn't match their types precision we use a element type of mode precision. The vectorization routines will have to make sure @@ -6086,6 +6081,11 @@ get_vectype_for_scalar_type_and_size (tr && !POINTER_TYPE_P (scalar_type)) scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); + /* We can't build a vector type of elements with alignment bigger than + their size. */ + if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) + scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); + /* If no size was supplied use the mode the target prefers. Otherwise lookup a vector mode of the specified size. */ if (size == 0) Index: gcc/testsuite/gcc.dg/torture/pr54894.c =================================================================== --- gcc/testsuite/gcc.dg/torture/pr54894.c (revision 0) +++ gcc/testsuite/gcc.dg/torture/pr54894.c (working copy) @@ -0,0 +1,27 @@ +/* { dg-do compile } */ + +typedef unsigned long long uint64_t; + +#define n 4096 +double A[n][n] __attribute__((aligned(16))); +double B[n][n] __attribute__((aligned(16))); +double C[n][n] __attribute__((aligned(16))); + +#define tilesize 128 + +typedef double adouble __attribute__((__aligned__(16))); + +void foo () +{ + int ih, jh, kh, il, kl, jl; + for (ih = 0; ih < n; ih += tilesize) + for (jh = 0; jh < n; jh += tilesize) + for (kh = 0; kh < n; kh += tilesize) + for (il = 0; il < tilesize; ++il) + { + adouble *Ap = (adouble *)&A[ih+il][kh]; + for (kl = 0; kl < tilesize; ++kl) + for (jl = 0; jl < tilesize; ++jl) + C[ih+il][jh+jl] += Ap[kl] * B[kh+kl][jh+jl]; + } +}