On Mon, May 04, 2015 at 10:11:13AM +0200, Richard Biener wrote: > Not sure how this helps when SRA tears apart the parameter. That is, > isn't the important thing that both the IPA modified function argument > types/decls have the same type as the types of the parameters SRA ends > up passing? (as far as alignment goes?) > > Yes, of course using "natural" alignment makes sure that the backend > can handle alignment properly and we don't run into oddball bugs here.
On IRC we were discussing making /* Return true if mode/type need doubleword alignment. */ static bool arm_needs_doubleword_align (machine_mode mode, const_tree type) { return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY - || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); + || (type && TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY)); } Looking at struct S { char a[16]; }; typedef struct S T; typedef struct S U __attribute__((aligned (16))); struct V { U u; T v; }; typedef int N __attribute__((aligned (16))); T t1; U u1; int a[3]; void f5 (__builtin_va_list *ap) { t1 = __builtin_va_arg (*ap, T); a[0] = __builtin_va_arg (*ap, int); u1 = __builtin_va_arg (*ap, U); a[1] = __builtin_va_arg (*ap, int); a[2] = __builtin_va_arg (*ap, N); } void f6 (int, N, int, U); void f7 (void) { U u = {}; f6 (0, (N) 1, 0, u); } and s/16/8/g output, it seems that neither i?86 nor x86_64 care about the alignment for any passing, ppc64le cares about aggregates, but not scalars apparently (with a warning that the passing changed), arm cares about both. And the f7 function shows that for non-aggregates, what arm does is simply never going to work, because there is no way to pass down the scalars aligned, f6 is still called with 1 in int type rather than N. So at least changing arm_needs_doubleword_align for non-aggregates would likely not break anything that hasn't been broken already and would unbreak the majority of cases. The following testcase shows that eipa_sra changes alignment even for the aggregates. Change aligned (8) to aligned (4) to see another possibility. /* PR target/65956 */ struct B { char *a, *b; }; typedef struct B C __attribute__((aligned (8))); struct A { C a; int b; long long c; }; char v[3]; __attribute__((noinline, noclone)) void fn1 (int v, ...) { __builtin_va_list ap; __builtin_va_start (ap, v); C c, d; c = __builtin_va_arg (ap, C); __builtin_va_arg (ap, int); d = __builtin_va_arg (ap, C); __builtin_va_end (ap); if (c.a != &v[1] || d.a != &v[2]) __builtin_abort (); v[1]++; } __attribute__((noinline, noclone)) int fn2 (C x) { asm volatile ("" : "+g" (x.a) : : "memory"); asm volatile ("" : "+g" (x.b) : : "memory"); return x.a == &v[0]; } __attribute__((noinline, noclone)) void fn3 (const char *x) { if (x[0] != 0) __builtin_abort (); } static struct A foo (const char *x, struct A y, struct A z) { struct A r = { { 0, 0 }, 0, 0 }; if (y.b && z.b) { if (fn2 (y.a) && fn2 (z.a)) switch (x[0]) { case '|': break; default: fn3 (x); } fn1 (0, y.a, 0, z.a); } return r; } __attribute__((noinline, noclone)) int bar (int x, struct A *y) { switch (x) { case 219: foo ("+", y[-2], y[0]); case 220: foo ("-", y[-2], y[0]); } } int main () { struct A a[3] = { { { &v[1], &v[0] }, 1, 1LL }, { { &v[0], &v[0] }, 0, 0LL }, { { &v[2], &v[0] }, 2, 2LL } }; bar (220, a + 2); if (v[1] != 1) __builtin_abort (); return 0; } Jakub