On Fri, Nov 30, 2012 at 2:02 PM, Martin Jambor <mjam...@suse.cz> wrote: > On Thu, Nov 29, 2012 at 11:06:41AM +0100, Martin Jambor wrote: >> Hi, >> >> thanks for the review. When writing a reply I realized I indeed made >> a mistake or two in the part concerning prev_base and the code was not >> what it intended to be. I'll re-write it today. > > OK, this is it. The part in tree-sra.c, in which I make IPA-SRA punt > when the alignment of the loads in the callee are not as big as > alignment of the required types of the new arguments, remained the > same. > > In ipa-prop.h, I now use maximum of the alignment from > get_pointer_alignment_1 and the type alignment propagated from the > callee. I now also recognize cases where the dereference is buried > below a COMPONENT_REF or ARRAY_REF (this is checked by new testcase > ipa-sra-9.c which fails at least on on sparc64 when things go wrong). > > The patch has passed bootstrap and testing on x86_64-linux, > ppc64-linux and sparc64-linux.
Works for me. Thanks, Richard. > Thanks, > > Martin > > > 2012-11-29 Martin Jambor <mjam...@suse.cz> > > PR middle-end/52890 > PR tree-optimization/55415 > PR tree-optimization/54386 > PR target/55448 > * ipa-prop.c (ipa_modify_call_arguments): Be optimistic when > get_pointer_alignment_1 returns false and the base was not a > dereference. > * tree-sra.c (access_precludes_ipa_sra_p): New parameter req_align, > added check for required alignment. Update the user. > > * testsuite/gcc.dg/ipa/ipa-sra-7.c: New test. > * testsuite/gcc.dg/ipa/ipa-sra-8.c: Likewise. > * testsuite/gcc.dg/ipa/ipa-sra-9.c: Likewise. > * testsuite/gcc.target/i386/pr55448.c: Likewise. > > > *** /tmp/cY007b_ipa-prop.c Fri Nov 30 12:26:26 2012 > --- gcc/ipa-prop.c Thu Nov 29 16:05:04 2012 > *************** ipa_modify_call_arguments (struct cgraph > *** 2888,2893 **** > --- 2888,2895 ---- > { > tree expr, base, off; > location_t loc; > + unsigned int deref_align; > + bool deref_base = false; > > /* We create a new parameter out of the value of the old one, we can > do the following kind of transformations: > *************** ipa_modify_call_arguments (struct cgraph > *** 2920,2928 **** > { > HOST_WIDE_INT base_offset; > tree prev_base; > > if (TREE_CODE (base) == ADDR_EXPR) > ! base = TREE_OPERAND (base, 0); > prev_base = base; > base = get_addr_base_and_unit_offset (base, &base_offset); > /* Aggregate arguments can have non-invariant addresses. */ > --- 2922,2936 ---- > { > HOST_WIDE_INT base_offset; > tree prev_base; > + bool addrof; > > if (TREE_CODE (base) == ADDR_EXPR) > ! { > ! base = TREE_OPERAND (base, 0); > ! addrof = true; > ! } > ! else > ! addrof = false; > prev_base = base; > base = get_addr_base_and_unit_offset (base, &base_offset); > /* Aggregate arguments can have non-invariant addresses. */ > *************** ipa_modify_call_arguments (struct cgraph > *** 2934,2939 **** > --- 2942,2952 ---- > } > else if (TREE_CODE (base) == MEM_REF) > { > + if (!addrof) > + { > + deref_base = true; > + deref_align = TYPE_ALIGN (TREE_TYPE (base)); > + } > off = build_int_cst (adj->alias_ptr_type, > base_offset > + adj->offset / BITS_PER_UNIT); > *************** ipa_modify_call_arguments (struct cgraph > *** 2956,2962 **** > unsigned int align; > unsigned HOST_WIDE_INT misalign; > > ! get_pointer_alignment_1 (base, &align, &misalign); > misalign += (tree_to_double_int (off) > .sext (TYPE_PRECISION (TREE_TYPE (off))).low > * BITS_PER_UNIT); > --- 2969,2985 ---- > unsigned int align; > unsigned HOST_WIDE_INT misalign; > > ! if (deref_base) > ! { > ! align = deref_align; > ! misalign = 0; > ! } > ! else > ! { > ! get_pointer_alignment_1 (base, &align, &misalign); > ! if (TYPE_ALIGN (type) > align) > ! align = TYPE_ALIGN (type); > ! } > misalign += (tree_to_double_int (off) > .sext (TYPE_PRECISION (TREE_TYPE (off))).low > * BITS_PER_UNIT); > *** /dev/null Thu Oct 25 13:49:30 2012 > --- gcc/testsuite/gcc.dg/ipa/ipa-sra-7.c Wed Nov 28 14:19:30 2012 > *************** > *** 0 **** > --- 1,42 ---- > + /* { dg-do run } */ > + /* { dg-options "-O3" } */ > + > + typedef unsigned int myint __attribute__((aligned(1))); > + > + typedef struct __attribute__((packed)) S { > + unsigned a, b, c; > + } SS; > + > + typedef SS __attribute__((aligned(1))) SSS; > + > + > + static unsigned int __attribute__ ((noinline)) > + get_a (SSS *p) > + { > + return p->a; > + }; > + > + static int __attribute__ ((noinline, noclone)) > + foo (SS *p) > + { > + int r = (int) get_a(p) + 2; > + return r; > + } > + > + char buf[512]; > + > + static SSS * __attribute__ ((noinline, noclone)) > + get_sss (void) > + { > + return (SSS *)(buf + 1); > + } > + > + > + int > + main(int argc, char *argv[]) > + { > + SSS *p = get_sss(); > + if (foo(p) != 2) > + __builtin_abort (); > + return 0; > + } > *** /dev/null Thu Oct 25 13:49:30 2012 > --- gcc/testsuite/gcc.dg/ipa/ipa-sra-8.c Fri Nov 30 00:52:03 2012 > *************** > *** 0 **** > --- 1,41 ---- > + /* { dg-do run } */ > + /* { dg-options "-O3" } */ > + > + typedef unsigned int myint __attribute__((aligned(1))); > + > + typedef struct S { > + unsigned a, b, c; > + } SS; > + > + typedef SS __attribute__((aligned(1))) SSS; > + > + > + static unsigned int __attribute__ ((noinline)) > + get_a (SS s) > + { > + return s.a; > + }; > + > + static int __attribute__ ((noinline, noclone)) > + foo (SSS *p) > + { > + int r = (int) get_a(*p) + 2; > + return r; > + } > + > + char buf[512]; > + > + static SSS * __attribute__ ((noinline, noclone)) > + get_sss (void) > + { > + return (SSS *)(buf + 1); > + } > + > + int > + main(int argc, char *argv[]) > + { > + SSS *p = get_sss(); > + if (foo(p) != 2) > + __builtin_abort (); > + return 0; > + } > *** /dev/null Thu Oct 25 13:49:30 2012 > --- gcc/testsuite/gcc.dg/ipa/ipa-sra-9.c Fri Nov 30 00:52:12 2012 > *************** > *** 0 **** > --- 1,44 ---- > + /* { dg-do run } */ > + /* { dg-options "-O3" } */ > + > + typedef unsigned int myint __attribute__((aligned(1))); > + > + typedef struct S { > + unsigned a, b, c; > + } SS; > + > + typedef struct U { > + SS s[2]; > + } UU; > + > + typedef UU __attribute__((aligned(1))) UUU; > + > + static unsigned int __attribute__ ((noinline)) > + get_a (SS s) > + { > + return s.a; > + }; > + > + static int __attribute__ ((noinline, noclone)) > + foo (UUU *p) > + { > + int r = (int) get_a(p->s[0]) + 2; > + return r; > + } > + > + char buf[512]; > + > + static UUU * __attribute__ ((noinline, noclone)) > + get_uuu (void) > + { > + return (UUU *)(buf + 1); > + } > + > + int > + main(int argc, char *argv[]) > + { > + UUU *p = get_uuu(); > + if (foo(p) != 2) > + __builtin_abort (); > + return 0; > + } > *** /dev/null Thu Oct 25 13:49:30 2012 > --- gcc/testsuite/gcc.target/i386/pr55448.c Thu Nov 29 15:42:16 2012 > *************** > *** 0 **** > --- 1,26 ---- > + /* { dg-do compile } */ > + /* { dg-options "-O2 -mavx" } */ > + > + #include <immintrin.h> > + > + static inline __m256 add1(const __m256 *a, const __m256 *b) > + { > + return _mm256_add_ps(*a, *b); > + } > + > + void foo1(__m256 *a, const __m256 b) > + { > + *a = add1(a, &b); > + } > + > + static inline __m128 add2(const __m128 *a, const __m128 *b) > + { > + return _mm_add_ps(*a, *b); > + } > + > + void foo2(__m128 *a, const __m128 b) > + { > + *a = add2(a, &b); > + } > + > + /* { dg-final { scan-assembler-not "vmovups" } } */ > *** /tmp/yTaOsa_tree-sra.c Fri Nov 30 12:26:26 2012 > --- gcc/tree-sra.c Wed Nov 28 14:19:30 2012 > *************** unmodified_by_ref_scalar_representative > *** 3891,3902 **** > return repr; > } > > ! /* Return true iff this access precludes IPA-SRA of the parameter it is > ! associated with. */ > > static bool > ! access_precludes_ipa_sra_p (struct access *access) > { > /* Avoid issues such as the second simple testcase in PR 42025. The > problem > is incompatible assign in a call statement (and possibly even in asm > statements). This can be relaxed by using a new temporary but only for > --- 3891,3903 ---- > return repr; > } > > ! /* Return true iff this ACCESS precludes IPA-SRA of the parameter it is > ! associated with. REQ_ALIGN is the minimum required alignment. */ > > static bool > ! access_precludes_ipa_sra_p (struct access *access, unsigned int req_align) > { > + unsigned int exp_align; > /* Avoid issues such as the second simple testcase in PR 42025. The > problem > is incompatible assign in a call statement (and possibly even in asm > statements). This can be relaxed by using a new temporary but only for > *************** access_precludes_ipa_sra_p (struct acces > *** 3908,3913 **** > --- 3909,3918 ---- > || gimple_code (access->stmt) == GIMPLE_ASM)) > return true; > > + exp_align = get_object_alignment (access->expr); > + if (exp_align < req_align) > + return true; > + > return false; > } > > *************** splice_param_accesses (tree parm, bool * > *** 3943,3949 **** > tree a1_alias_type; > access = (*access_vec)[i]; > modification = access->write; > ! if (access_precludes_ipa_sra_p (access)) > return NULL; > a1_alias_type = reference_alias_ptr_type (access->expr); > > --- 3948,3954 ---- > tree a1_alias_type; > access = (*access_vec)[i]; > modification = access->write; > ! if (access_precludes_ipa_sra_p (access, TYPE_ALIGN (access->type))) > return NULL; > a1_alias_type = reference_alias_ptr_type (access->expr); > > *************** splice_param_accesses (tree parm, bool * > *** 3966,3972 **** > else if (ac2->size != access->size) > return NULL; > > ! if (access_precludes_ipa_sra_p (ac2) > || (ac2->type != access->type > && (TREE_ADDRESSABLE (ac2->type) > || TREE_ADDRESSABLE (access->type))) > --- 3971,3977 ---- > else if (ac2->size != access->size) > return NULL; > > ! if (access_precludes_ipa_sra_p (ac2, TYPE_ALIGN (access->type)) > || (ac2->type != access->type > && (TREE_ADDRESSABLE (ac2->type) > || TREE_ADDRESSABLE (access->type)))