On Thu, Nov 29, 2012 at 11:06:41AM +0100, Martin Jambor wrote: > Hi, > > thanks for the review. When writing a reply I realized I indeed made > a mistake or two in the part concerning prev_base and the code was not > what it intended to be. I'll re-write it today.
OK, this is it. The part in tree-sra.c, in which I make IPA-SRA punt when the alignment of the loads in the callee are not as big as alignment of the required types of the new arguments, remained the same. In ipa-prop.h, I now use maximum of the alignment from get_pointer_alignment_1 and the type alignment propagated from the callee. I now also recognize cases where the dereference is buried below a COMPONENT_REF or ARRAY_REF (this is checked by new testcase ipa-sra-9.c which fails at least on on sparc64 when things go wrong). The patch has passed bootstrap and testing on x86_64-linux, ppc64-linux and sparc64-linux. Thanks, Martin 2012-11-29 Martin Jambor <mjam...@suse.cz> PR middle-end/52890 PR tree-optimization/55415 PR tree-optimization/54386 PR target/55448 * ipa-prop.c (ipa_modify_call_arguments): Be optimistic when get_pointer_alignment_1 returns false and the base was not a dereference. * tree-sra.c (access_precludes_ipa_sra_p): New parameter req_align, added check for required alignment. Update the user. * testsuite/gcc.dg/ipa/ipa-sra-7.c: New test. * testsuite/gcc.dg/ipa/ipa-sra-8.c: Likewise. * testsuite/gcc.dg/ipa/ipa-sra-9.c: Likewise. * testsuite/gcc.target/i386/pr55448.c: Likewise. *** /tmp/cY007b_ipa-prop.c Fri Nov 30 12:26:26 2012 --- gcc/ipa-prop.c Thu Nov 29 16:05:04 2012 *************** ipa_modify_call_arguments (struct cgraph *** 2888,2893 **** --- 2888,2895 ---- { tree expr, base, off; location_t loc; + unsigned int deref_align; + bool deref_base = false; /* We create a new parameter out of the value of the old one, we can do the following kind of transformations: *************** ipa_modify_call_arguments (struct cgraph *** 2920,2928 **** { HOST_WIDE_INT base_offset; tree prev_base; if (TREE_CODE (base) == ADDR_EXPR) ! base = TREE_OPERAND (base, 0); prev_base = base; base = get_addr_base_and_unit_offset (base, &base_offset); /* Aggregate arguments can have non-invariant addresses. */ --- 2922,2936 ---- { HOST_WIDE_INT base_offset; tree prev_base; + bool addrof; if (TREE_CODE (base) == ADDR_EXPR) ! { ! base = TREE_OPERAND (base, 0); ! addrof = true; ! } ! else ! addrof = false; prev_base = base; base = get_addr_base_and_unit_offset (base, &base_offset); /* Aggregate arguments can have non-invariant addresses. */ *************** ipa_modify_call_arguments (struct cgraph *** 2934,2939 **** --- 2942,2952 ---- } else if (TREE_CODE (base) == MEM_REF) { + if (!addrof) + { + deref_base = true; + deref_align = TYPE_ALIGN (TREE_TYPE (base)); + } off = build_int_cst (adj->alias_ptr_type, base_offset + adj->offset / BITS_PER_UNIT); *************** ipa_modify_call_arguments (struct cgraph *** 2956,2962 **** unsigned int align; unsigned HOST_WIDE_INT misalign; ! get_pointer_alignment_1 (base, &align, &misalign); misalign += (tree_to_double_int (off) .sext (TYPE_PRECISION (TREE_TYPE (off))).low * BITS_PER_UNIT); --- 2969,2985 ---- unsigned int align; unsigned HOST_WIDE_INT misalign; ! if (deref_base) ! { ! align = deref_align; ! misalign = 0; ! } ! else ! { ! get_pointer_alignment_1 (base, &align, &misalign); ! if (TYPE_ALIGN (type) > align) ! align = TYPE_ALIGN (type); ! } misalign += (tree_to_double_int (off) .sext (TYPE_PRECISION (TREE_TYPE (off))).low * BITS_PER_UNIT); *** /dev/null Thu Oct 25 13:49:30 2012 --- gcc/testsuite/gcc.dg/ipa/ipa-sra-7.c Wed Nov 28 14:19:30 2012 *************** *** 0 **** --- 1,42 ---- + /* { dg-do run } */ + /* { dg-options "-O3" } */ + + typedef unsigned int myint __attribute__((aligned(1))); + + typedef struct __attribute__((packed)) S { + unsigned a, b, c; + } SS; + + typedef SS __attribute__((aligned(1))) SSS; + + + static unsigned int __attribute__ ((noinline)) + get_a (SSS *p) + { + return p->a; + }; + + static int __attribute__ ((noinline, noclone)) + foo (SS *p) + { + int r = (int) get_a(p) + 2; + return r; + } + + char buf[512]; + + static SSS * __attribute__ ((noinline, noclone)) + get_sss (void) + { + return (SSS *)(buf + 1); + } + + + int + main(int argc, char *argv[]) + { + SSS *p = get_sss(); + if (foo(p) != 2) + __builtin_abort (); + return 0; + } *** /dev/null Thu Oct 25 13:49:30 2012 --- gcc/testsuite/gcc.dg/ipa/ipa-sra-8.c Fri Nov 30 00:52:03 2012 *************** *** 0 **** --- 1,41 ---- + /* { dg-do run } */ + /* { dg-options "-O3" } */ + + typedef unsigned int myint __attribute__((aligned(1))); + + typedef struct S { + unsigned a, b, c; + } SS; + + typedef SS __attribute__((aligned(1))) SSS; + + + static unsigned int __attribute__ ((noinline)) + get_a (SS s) + { + return s.a; + }; + + static int __attribute__ ((noinline, noclone)) + foo (SSS *p) + { + int r = (int) get_a(*p) + 2; + return r; + } + + char buf[512]; + + static SSS * __attribute__ ((noinline, noclone)) + get_sss (void) + { + return (SSS *)(buf + 1); + } + + int + main(int argc, char *argv[]) + { + SSS *p = get_sss(); + if (foo(p) != 2) + __builtin_abort (); + return 0; + } *** /dev/null Thu Oct 25 13:49:30 2012 --- gcc/testsuite/gcc.dg/ipa/ipa-sra-9.c Fri Nov 30 00:52:12 2012 *************** *** 0 **** --- 1,44 ---- + /* { dg-do run } */ + /* { dg-options "-O3" } */ + + typedef unsigned int myint __attribute__((aligned(1))); + + typedef struct S { + unsigned a, b, c; + } SS; + + typedef struct U { + SS s[2]; + } UU; + + typedef UU __attribute__((aligned(1))) UUU; + + static unsigned int __attribute__ ((noinline)) + get_a (SS s) + { + return s.a; + }; + + static int __attribute__ ((noinline, noclone)) + foo (UUU *p) + { + int r = (int) get_a(p->s[0]) + 2; + return r; + } + + char buf[512]; + + static UUU * __attribute__ ((noinline, noclone)) + get_uuu (void) + { + return (UUU *)(buf + 1); + } + + int + main(int argc, char *argv[]) + { + UUU *p = get_uuu(); + if (foo(p) != 2) + __builtin_abort (); + return 0; + } *** /dev/null Thu Oct 25 13:49:30 2012 --- gcc/testsuite/gcc.target/i386/pr55448.c Thu Nov 29 15:42:16 2012 *************** *** 0 **** --- 1,26 ---- + /* { dg-do compile } */ + /* { dg-options "-O2 -mavx" } */ + + #include <immintrin.h> + + static inline __m256 add1(const __m256 *a, const __m256 *b) + { + return _mm256_add_ps(*a, *b); + } + + void foo1(__m256 *a, const __m256 b) + { + *a = add1(a, &b); + } + + static inline __m128 add2(const __m128 *a, const __m128 *b) + { + return _mm_add_ps(*a, *b); + } + + void foo2(__m128 *a, const __m128 b) + { + *a = add2(a, &b); + } + + /* { dg-final { scan-assembler-not "vmovups" } } */ *** /tmp/yTaOsa_tree-sra.c Fri Nov 30 12:26:26 2012 --- gcc/tree-sra.c Wed Nov 28 14:19:30 2012 *************** unmodified_by_ref_scalar_representative *** 3891,3902 **** return repr; } ! /* Return true iff this access precludes IPA-SRA of the parameter it is ! associated with. */ static bool ! access_precludes_ipa_sra_p (struct access *access) { /* Avoid issues such as the second simple testcase in PR 42025. The problem is incompatible assign in a call statement (and possibly even in asm statements). This can be relaxed by using a new temporary but only for --- 3891,3903 ---- return repr; } ! /* Return true iff this ACCESS precludes IPA-SRA of the parameter it is ! associated with. REQ_ALIGN is the minimum required alignment. */ static bool ! access_precludes_ipa_sra_p (struct access *access, unsigned int req_align) { + unsigned int exp_align; /* Avoid issues such as the second simple testcase in PR 42025. The problem is incompatible assign in a call statement (and possibly even in asm statements). This can be relaxed by using a new temporary but only for *************** access_precludes_ipa_sra_p (struct acces *** 3908,3913 **** --- 3909,3918 ---- || gimple_code (access->stmt) == GIMPLE_ASM)) return true; + exp_align = get_object_alignment (access->expr); + if (exp_align < req_align) + return true; + return false; } *************** splice_param_accesses (tree parm, bool * *** 3943,3949 **** tree a1_alias_type; access = (*access_vec)[i]; modification = access->write; ! if (access_precludes_ipa_sra_p (access)) return NULL; a1_alias_type = reference_alias_ptr_type (access->expr); --- 3948,3954 ---- tree a1_alias_type; access = (*access_vec)[i]; modification = access->write; ! if (access_precludes_ipa_sra_p (access, TYPE_ALIGN (access->type))) return NULL; a1_alias_type = reference_alias_ptr_type (access->expr); *************** splice_param_accesses (tree parm, bool * *** 3966,3972 **** else if (ac2->size != access->size) return NULL; ! if (access_precludes_ipa_sra_p (ac2) || (ac2->type != access->type && (TREE_ADDRESSABLE (ac2->type) || TREE_ADDRESSABLE (access->type))) --- 3971,3977 ---- else if (ac2->size != access->size) return NULL; ! if (access_precludes_ipa_sra_p (ac2, TYPE_ALIGN (access->type)) || (ac2->type != access->type && (TREE_ADDRESSABLE (ac2->type) || TREE_ADDRESSABLE (access->type)))