On 18 May 2016 at 19:38, Richard Biener <[email protected]> wrote: > On Wed, 18 May 2016, Prathamesh Kulkarni wrote: > >> On 17 May 2016 at 18:36, Richard Biener <[email protected]> wrote: >> > On Wed, 11 May 2016, Prathamesh Kulkarni wrote: >> > >> >> On 6 May 2016 at 17:20, Richard Biener <[email protected]> wrote: >> >> > >> >> > You can't simply use >> >> > >> >> > + offset = int_byte_position (field); >> >> > >> >> > as it can be placed at variable offset which will make int_byte_position >> >> > ICE. Note it also returns a truncated byte position (bit position >> >> > stripped) which may be undesirable here. I think you want to use >> >> > bit_position and if and only if DECL_FIELD_OFFSET and >> >> > DECL_FIELD_BIT_OFFSET are INTEGER_CST. >> >> oops, I didn't realize offsets could be variable. >> >> Will that be the case only for VLA member inside struct ? >> > >> > And non-VLA members after such member. >> > >> >> > Your observation about the expensiveness of the walk still stands I >> >> > guess >> >> > and eventually you should at least cache the >> >> > get_vec_alignment_for_record_decl cases. Please make those workers >> >> > _type rather than _decl helpers. >> >> Done >> >> > >> >> > You seem to simply get at the maximum vectorized field/array element >> >> > alignment possible for all arrays - you could restrict that to >> >> > arrays with at least vector size (as followup). >> >> Um sorry, I didn't understand this part. >> > >> > It doesn't make sense to align >> > >> > struct { int a; int b; int c; int d; float b[3]; int e; }; >> > >> > because we have a float[3] member. There is no vector size that >> > would cover the float[3] array. >> Thanks for the explanation. >> So we do not want to align struct if sizeof (array_field) < sizeof >> (vector_type). >> This issue is also present without patch for global arrays, so I modified >> get_vec_alignment_for_array_type, to return 0 if sizeof (array_type) < >> sizeof (vectype). >> > >> >> > >> >> > + /* Skip artificial decls like typeinfo decls or if >> >> > + record is packed. */ >> >> > + if (DECL_ARTIFICIAL (record_decl) || TYPE_PACKED (type)) >> >> > + return 0; >> >> > >> >> > I think we should honor DECL_USER_ALIGN as well and not mess with those >> >> > decls. >> >> Done >> >> > >> >> > Given the patch now does quite some extra work it might make sense >> >> > to split the symtab part out of the vect_can_force_dr_alignment_p >> >> > predicate and call that early. >> >> In the patch I call symtab_node::can_increase_alignment_p early. I tried >> >> moving that to it's callers - vect_compute_data_ref_alignment and >> >> increase_alignment::execute, however that failed some tests in vect, and >> >> hence I didn't add the following hunk in the patch. Did I miss some >> >> check ? >> > >> > Not sure. >> > >> >> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c >> >> index 7652e21..2c1acee 100644 >> >> --- a/gcc/tree-vect-data-refs.c >> >> +++ b/gcc/tree-vect-data-refs.c >> >> @@ -795,7 +795,10 @@ vect_compute_data_ref_alignment (struct >> >> data_reference *dr) >> >> && TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR) >> >> base = TREE_OPERAND (TREE_OPERAND (base, 0), 0); >> >> >> >> - if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) >> >> + if (!(TREE_CODE (base) == VAR_DECL >> >> + && decl_in_symtab_p (base) >> >> + && symtab_node::get (base)->can_increase_alignment_p () >> >> + && vect_can_force_dr_alignment_p (base, TYPE_ALIGN >> >> (vectype)))) >> >> { >> >> if (dump_enabled_p ()) >> >> { >> > >> > + for (tree field = first_field (type); >> > + field != NULL_TREE; >> > + field = DECL_CHAIN (field)) >> > + { >> > + /* Skip if not FIELD_DECL or has variable offset. */ >> > + if (TREE_CODE (field) != FIELD_DECL >> > + || TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST >> > + || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST >> > + || DECL_USER_ALIGN (field) >> > + || DECL_ARTIFICIAL (field)) >> > + continue; >> > >> > you can stop processing the type and return 0 here if the offset >> > is not an INTEGER_CST. All following fields will have the same issue. >> > >> > + /* FIXME: is this check necessary since we check for variable >> > offset above ? */ >> > + if (TREE_CODE (offset_tree) != INTEGER_CST) >> > + continue; >> > >> > the check should not be necessary. >> > >> > offset = tree_to_uhwi (offset_tree); >> > >> > but in theory offset_tree might not fit a unsigned HOST_WIDE_INT, so >> > instead of for INTEGER_CST please check ! tree_fits_uhwi_p (offset_tree). >> > As above all following fields will also fail the check if this fails so >> > you can return 0 early. >> > >> > +static unsigned >> > +get_vec_alignment_for_type (tree type) >> > +{ >> > + if (type == NULL_TREE) >> > + return 0; >> > + >> > + gcc_assert (TYPE_P (type)); >> > + >> > + unsigned *slot = type_align_map->get (type); >> > + if (slot) >> > + return *slot; >> > >> > I suggest to apply the caching only for the RECORD_TYPE case to keep >> > the size of the map low. >> > >> > Otherwise the patch looks ok now. >> I have done the suggested changes in attached patch. >> Is this version OK to commit after bootstrap+test ? >> The patch survives cross-testing on arm*-*-* and aarch64*-*-*. > > + tree type_size_tree = TYPE_SIZE (type); > + if (!type_size_tree) > + return TYPE_ALIGN (vectype); > + > + if (TREE_CODE (type_size_tree) != INTEGER_CST) > + return TYPE_ALIGN (vectype); > + > + /* If array has constant size, ensure that it's at least equal to > + size of vector type. */ > + if (!tree_fits_uhwi_p (type_size_tree)) > + return TYPE_ALIGN (vectype); > + unsigned HOST_WIDE_INT type_size = tree_to_uhwi (type_size_tree); > + > + tree vectype_size_tree = TYPE_SIZE (vectype); > + if (!tree_fits_uhwi_p (vectype_size_tree)) > + return TYPE_ALIGN (vectype); > + > + unsigned HOST_WIDE_INT vectype_size = tree_to_uhwi (vectype_size_tree); > + return (type_size > vectype_size) ? TYPE_ALIGN (vectype) : 0; > > please change this to > > if (! TYPE_SIZE (type) > || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST > || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype))) > return 0; > > return TYPE_ALIGN (vectype); > > consistent with the handling for "VLA" records. > > + unsigned *slot = type_align_map->get (type); > + if (slot) > + return *slot; > + > + unsigned max_align = 0, alignment; > + HOST_WIDE_INT offset; > + tree offset_tree; > + > + if (TYPE_PACKED (type)) > + return 0; > + > > move the tye_align_map query after the TYPE_PACKED check. > > + /* We don't need to process the type further if offset is variable, > + since the offsets of remaining members will also be variable. */ > + if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST > + || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) > + return 0; > > just break; here (and in the other case returning zero). If a > previous record field said we'd want to align we still should align. > Also we want to store 0 into the type_align_map as well. > > Ok with those changes. Hi, Is this version OK ? Cross tested on arm*-*-* and aarch64*-*-*
Thanks, Prathamesh > > Thanks, > Richard.
diff --git a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-70.c
b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-70.c
new file mode 100644
index 0000000..7010a52
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-70.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 32
+
+/* Increase alignment of struct if an array's offset is multiple of alignment
of
+ vector type corresponding to it's scalar type.
+ For the below test-case:
+ offsetof(e) == 8 bytes.
+ i) For arm: let x = alignment of vector type corresponding to int,
+ x == 8 bytes.
+ Since offsetof(e) % x == 0, set DECL_ALIGN(a, b, c) to x.
+ ii) For aarch64, ppc: x == 16 bytes.
+ Since offsetof(e) % x != 0, don't increase alignment of a, b, c.
+*/
+
+static struct A {
+ int p1, p2;
+ int e[N];
+} a, b, c;
+
+int foo(void)
+{
+ for (int i = 0; i < N; i++)
+ a.e[i] = b.e[i] + c.e[i];
+
+ return a.e[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-71.c
b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-71.c
new file mode 100644
index 0000000..7cbd1dc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-71.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+/* Should not increase alignment of the struct because
+ sizeof (A.e) < sizeof(corresponding vector type). */
+
+#define N 3
+
+static struct A {
+ int p1, p2;
+ int e[N];
+} a, b, c;
+
+int foo(void)
+{
+ for (int i = 0; i < N; i++)
+ a.e[i] = b.e[i] + c.e[i];
+
+ return a.e[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-72.c
b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-72.c
new file mode 100644
index 0000000..873fabe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-72.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 32
+
+/* Clone of section-anchors-vect-70.c having nested struct. */
+
+struct S
+{
+ int e[N];
+};
+
+static struct A {
+ int p1, p2;
+ struct S s;
+} a, b, c;
+
+int foo(void)
+{
+ for (int i = 0; i < N; i++)
+ a.s.e[i] = b.s.e[i] + c.s.e[i];
+
+ return a.s.e[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 2b25b45..bd04cd7 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -794,38 +793,142 @@ make_pass_slp_vectorize (gcc::context *ctxt)
This should involve global alignment analysis and in the future also
array padding. */
+static unsigned get_vec_alignment_for_type (tree);
+static hash_map<tree, unsigned> *type_align_map;
+
+/* Return alignment of array's vector type corresponding to scalar type.
+ 0 if no vector type exists. */
+static unsigned
+get_vec_alignment_for_array_type (tree type)
+{
+ gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+
+ tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
+ if (!vectype
+ || !TYPE_SIZE (type)
+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+ || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype)))
+ return 0;
+
+ return TYPE_ALIGN (vectype);
+}
+
+/* Return alignment of field having maximum alignment of vector type
+ corresponding to it's scalar type. For now, we only consider fields whose
+ offset is a multiple of it's vector alignment.
+ 0 if no suitable field is found. */
+static unsigned
+get_vec_alignment_for_record_type (tree type)
+{
+ gcc_assert (TREE_CODE (type) == RECORD_TYPE);
+
+ unsigned max_align = 0, alignment;
+ HOST_WIDE_INT offset;
+ tree offset_tree;
+
+ if (TYPE_PACKED (type))
+ return 0;
+
+ unsigned *slot = type_align_map->get (type);
+ if (slot)
+ return *slot;
+
+ for (tree field = first_field (type);
+ field != NULL_TREE;
+ field = DECL_CHAIN (field))
+ {
+ /* Skip if not FIELD_DECL or if alignment is set by user. */
+ if (TREE_CODE (field) != FIELD_DECL
+ || DECL_USER_ALIGN (field)
+ || DECL_ARTIFICIAL (field))
+ continue;
+
+ /* We don't need to process the type further if offset is variable,
+ since the offsets of remaining members will also be variable. */
+ if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
+ || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
+ break;
+
+ /* Similarly stop processing the type if offset_tree
+ does not fit in unsigned HOST_WIDE_INT. */
+ offset_tree = bit_position (field);
+ if (!tree_fits_uhwi_p (offset_tree))
+ break;
+
+ offset = tree_to_uhwi (offset_tree);
+ alignment = get_vec_alignment_for_type (TREE_TYPE (field));
+
+ /* Get maximum alignment of vectorized field/array among those members
+ whose offset is multiple of the vector alignment. */
+ if (alignment
+ && (offset % alignment == 0)
+ && (alignment > max_align))
+ max_align = alignment;
+ }
+
+ type_align_map->put (type, max_align);
+ return max_align;
+}
+
+/* Return alignment of vector type corresponding to decl's scalar type
+ or 0 if it doesn't exist or the vector alignment is lesser than
+ decl's alignment. */
+static unsigned
+get_vec_alignment_for_type (tree type)
+{
+ if (type == NULL_TREE)
+ return 0;
+
+ gcc_assert (TYPE_P (type));
+
+ static unsigned alignment = 0;
+ switch (TREE_CODE (type))
+ {
+ case ARRAY_TYPE:
+ alignment = get_vec_alignment_for_array_type (type);
+ break;
+ case RECORD_TYPE:
+ alignment = get_vec_alignment_for_record_type (type);
+ break;
+ default:
+ alignment = 0;
+ break;
+ }
+
+ return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
+}
+
+/* Entry point to increase_alignment pass. */
static unsigned int
increase_alignment (void)
{
varpool_node *vnode;
vect_location = UNKNOWN_LOCATION;
+ type_align_map = new hash_map<tree, unsigned>;
/* Increase the alignment of all global arrays for vectorization. */
FOR_EACH_DEFINED_VARIABLE (vnode)
{
- tree vectype, decl = vnode->decl;
- tree t;
+ tree decl = vnode->decl;
unsigned int alignment;
- t = TREE_TYPE (decl);
- if (TREE_CODE (t) != ARRAY_TYPE)
- continue;
- vectype = get_vectype_for_scalar_type (strip_array_types (t));
- if (!vectype)
- continue;
- alignment = TYPE_ALIGN (vectype);
- if (DECL_ALIGN (decl) >= alignment)
- continue;
-
- if (vect_can_force_dr_alignment_p (decl, alignment))
+ if ((decl_in_symtab_p (decl)
+ && !symtab_node::get (decl)->can_increase_alignment_p ())
+ || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
+ continue;
+
+ alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
+ if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
{
- vnode->increase_alignment (TYPE_ALIGN (vectype));
+ vnode->increase_alignment (alignment);
dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
dump_printf (MSG_NOTE, "\n");
}
}
+
+ delete type_align_map;
return 0;
}
ChangeLog
Description: Binary data
