Hi Richard,
As discussed in private mail, this version of patch attempts to
increase alignment
of global struct decl if it contains an an array field(s) and array's
offset is a multiple of the alignment of vector type corresponding to
it's scalar type and recursively checks for nested structs.
eg:
static struct
{
  int a, b, c, d;
  int k[4];
  float f[10];
};
k is a candidate array since it's offset is 16 and alignment of
"vector (4) int" is 8.
Similarly for f.

I haven't been able to create a test-case where there are
multiple candidate arrays and vector alignment of arrays are different.
I suppose in this case we will have to increase alignment
of the struct by the max alignment ?
eg:
static struct
{
  <fields>
  T1 k[S1]
  <fields>
  T2 f[S2]
  <fields>
};

if V1 is vector type corresponding to T1, and V2 corresponding vector
type to T2,
offset (k) % align(V1) == 0 and offset (f) % align(V2) == 0
and align (V1) > align(V2) then we will increase alignment of struct
by align(V1).

Testing showed FAIL for g++.dg/torture/pr31863.C due to program timeout.
Initially it appeared to me, it went in infinite loop. However
on second thoughts I think it's probably not an infinite loop, rather
taking (extraordinarily) large amount of time
to compile the test-case with the patch.
The test-case  builds quickly for only 2 instantiations of ClassSpec
(ClassSpec<Key, A001, 1>,
 ClassSpec<Key, A002, 2>)
Building with 22 instantiations (upto ClassSpec<Key, A0023, 22>) takes up
to ~1m to compile.
with:
23  instantiations: ~2m
24 instantiations: ~5m
For 30 instantiations I terminated cc1plus after 13m (by SIGKILL).

I guess it shouldn't go in an infinite loop because:
a) structs cannot have circular references.
b) works for lower number of instantiations
However I have no sound evidence that it cannot be in infinite loop.
I don't understand why a decl node is getting visited more than once
for that test-case.

Using a hash_map to store alignments of decl's so that decl node gets visited
only once prevents the issue.
The patch is cross-tested on aarch64*-*-* and arm*-*-* and passes with
using hash_map
workaround.

Thanks,
Prathamesh
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 2b25b45..6f8c3b6 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -794,38 +794,112 @@ make_pass_slp_vectorize (gcc::context *ctxt)
      This should involve global alignment analysis and in the future also
      array padding.  */
 
+static unsigned get_vec_alignment_for_decl (tree);
+
+/* FIXME: decl_align_map is used to cache vec alignments of decl nodes,
+   so if a decl node is visited again, get_vec_alignment_for_decl
+   returns the alignment.
+   This is done to avoid looping infinitely or taking
+   very large time to compile for pr31863.C (and similar cases).
+   This happens because a decl node gets visited multiple times.
+   I am not sure why a decl node gets visited multiple times.  */
+static hash_map<tree, unsigned> *decl_align_map;
+
+static unsigned
+get_vec_alignment_for_array_decl (tree array_decl)
+{
+  tree type = TREE_TYPE (array_decl);
+  gcc_assert (TREE_CODE (type) == ARRAY_TYPE); 
+
+  tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
+  return (vectype) ? TYPE_ALIGN (vectype) : 0; 
+}
+
+static unsigned
+get_vec_alignment_for_record_decl (tree record_decl)
+{
+  tree type = TREE_TYPE (record_decl);
+  gcc_assert (TREE_CODE (type) == RECORD_TYPE);
+  unsigned max_align = 0, alignment;
+  HOST_WIDE_INT offset; 
+
+  if (TYPE_PACKED (type))
+    return 0;
+
+  for (tree field = first_field (type); field != NULL_TREE; field = DECL_CHAIN 
(field)) 
+    {
+      /* C++FE puts node "._0" of code TYPE_DECL. skip that.  */
+      if (TREE_CODE (field) != FIELD_DECL)
+       continue;
+
+      offset = int_byte_position (field);
+      alignment = get_vec_alignment_for_decl (field);
+      if (alignment && (offset % (alignment / BITS_PER_UNIT) == 0) && 
(alignment > max_align))
+       max_align = alignment;
+    }
+  
+  return max_align; 
+}
+
+static unsigned
+get_vec_alignment_for_decl (tree decl)
+{
+  if (decl == NULL_TREE)
+    return 0;
+
+  gcc_assert (DECL_P (decl));
+
+  unsigned *slot = decl_align_map->get (decl);
+  if (slot)
+    return *slot;
+
+  static unsigned alignment = 0;
+  tree type = TREE_TYPE (decl);
+
+  switch (TREE_CODE (type))
+    {
+      case ARRAY_TYPE:
+       alignment = get_vec_alignment_for_array_decl (decl);
+       break;
+      case RECORD_TYPE:
+       alignment = get_vec_alignment_for_record_decl (decl);
+       break;
+      default:
+       alignment = 0;
+       break;
+    }
+
+  unsigned ret = (alignment > DECL_ALIGN (decl)) ? alignment : 0;
+  decl_align_map->put (decl, ret);
+  return ret;
+}
+
 static unsigned int
 increase_alignment (void)
 {
   varpool_node *vnode;
 
   vect_location = UNKNOWN_LOCATION;
-
+  decl_align_map = new hash_map<tree, unsigned>;
+  
   /* Increase the alignment of all global arrays for vectorization.  */
   FOR_EACH_DEFINED_VARIABLE (vnode)
     {
       tree vectype, decl = vnode->decl;
-      tree t;
       unsigned int alignment;
 
-      t = TREE_TYPE (decl);
-      if (TREE_CODE (t) != ARRAY_TYPE)
-        continue;
-      vectype = get_vectype_for_scalar_type (strip_array_types (t));
-      if (!vectype)
-        continue;
-      alignment = TYPE_ALIGN (vectype);
-      if (DECL_ALIGN (decl) >= alignment)
-        continue;
-
-      if (vect_can_force_dr_alignment_p (decl, alignment))
+      alignment = get_vec_alignment_for_decl (decl);
+
+      if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
         {
-         vnode->increase_alignment (TYPE_ALIGN (vectype));
+         vnode->increase_alignment (alignment);
           dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
           dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
           dump_printf (MSG_NOTE, "\n");
         }
     }
+
+  delete decl_align_map;
   return 0;
 }
 

Attachment: ChangeLog
Description: Binary data

Reply via email to