This fixes PR54894.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2012-10-12  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/54894
        * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size):
        Handle over-aligned scalar types properly.

        * gcc.dg/torture/pr54894.c: New testcase.

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 192359)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -6060,11 +6060,6 @@ get_vectype_for_scalar_type_and_size (tr
       && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
     return NULL_TREE;
 
-  /* We can't build a vector type of elements with alignment bigger than
-     their size.  */
-  if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
-    return NULL_TREE;
-
   /* For vector types of elements whose mode precision doesn't
      match their types precision we use a element type of mode
      precision.  The vectorization routines will have to make sure
@@ -6086,6 +6081,11 @@ get_vectype_for_scalar_type_and_size (tr
       && !POINTER_TYPE_P (scalar_type))
     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
 
+  /* We can't build a vector type of elements with alignment bigger than
+     their size.  */
+  if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
+    scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
+
   /* If no size was supplied use the mode the target prefers.   Otherwise
      lookup a vector mode of the specified size.  */
   if (size == 0)
Index: gcc/testsuite/gcc.dg/torture/pr54894.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr54894.c      (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr54894.c      (working copy)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+
+typedef unsigned long long uint64_t;
+
+#define n 4096
+double A[n][n] __attribute__((aligned(16)));
+double B[n][n] __attribute__((aligned(16)));
+double C[n][n] __attribute__((aligned(16)));
+
+#define tilesize 128
+
+typedef double adouble __attribute__((__aligned__(16)));
+
+void foo ()
+{
+  int ih, jh, kh, il, kl, jl;
+  for (ih = 0; ih < n; ih += tilesize) 
+    for (jh = 0; jh < n; jh += tilesize)                 
+      for (kh = 0; kh < n; kh += tilesize)                      
+       for (il = 0; il < tilesize; ++il)
+         {
+           adouble *Ap = (adouble *)&A[ih+il][kh];
+           for (kl = 0; kl < tilesize; ++kl)
+             for (jl = 0; jl < tilesize; ++jl)
+               C[ih+il][jh+jl] += Ap[kl] * B[kh+kl][jh+jl];
+         }
+}

Reply via email to