On Fri, Nov 30, 2012 at 2:02 PM, Martin Jambor <mjam...@suse.cz> wrote:
> On Thu, Nov 29, 2012 at 11:06:41AM +0100, Martin Jambor wrote:
>> Hi,
>>
>> thanks for the review.  When writing a reply I realized I indeed made
>> a mistake or two in the part concerning prev_base and the code was not
>> what it intended to be.  I'll re-write it today.
>
> OK, this is it.  The part in tree-sra.c, in which I make IPA-SRA punt
> when the alignment of the loads in the callee are not as big as
> alignment of the required types of the new arguments, remained the
> same.
>
> In ipa-prop.h, I now use maximum of the alignment from
> get_pointer_alignment_1 and the type alignment propagated from the
> callee.  I now also recognize cases where the dereference is buried
> below a COMPONENT_REF or ARRAY_REF (this is checked by new testcase
> ipa-sra-9.c which fails at least on on sparc64 when things go wrong).
>
> The patch has passed bootstrap and testing on x86_64-linux,
> ppc64-linux and sparc64-linux.

Works for me.

Thanks,
Richard.

> Thanks,
>
> Martin
>
>
> 2012-11-29  Martin Jambor  <mjam...@suse.cz>
>
>         PR middle-end/52890
>         PR tree-optimization/55415
>         PR tree-optimization/54386
>         PR target/55448
>         * ipa-prop.c (ipa_modify_call_arguments): Be optimistic when
>         get_pointer_alignment_1 returns false and the base was not a
>         dereference.
>         * tree-sra.c (access_precludes_ipa_sra_p): New parameter req_align,
>         added check for required alignment.  Update the user.
>
>         * testsuite/gcc.dg/ipa/ipa-sra-7.c: New test.
>         * testsuite/gcc.dg/ipa/ipa-sra-8.c: Likewise.
>         * testsuite/gcc.dg/ipa/ipa-sra-9.c: Likewise.
>         * testsuite/gcc.target/i386/pr55448.c: Likewise.
>
>
> *** /tmp/cY007b_ipa-prop.c      Fri Nov 30 12:26:26 2012
> --- gcc/ipa-prop.c      Thu Nov 29 16:05:04 2012
> *************** ipa_modify_call_arguments (struct cgraph
> *** 2888,2893 ****
> --- 2888,2895 ----
>         {
>           tree expr, base, off;
>           location_t loc;
> +         unsigned int deref_align;
> +         bool deref_base = false;
>
>           /* We create a new parameter out of the value of the old one, we can
>              do the following kind of transformations:
> *************** ipa_modify_call_arguments (struct cgraph
> *** 2920,2928 ****
>             {
>               HOST_WIDE_INT base_offset;
>               tree prev_base;
>
>               if (TREE_CODE (base) == ADDR_EXPR)
> !               base = TREE_OPERAND (base, 0);
>               prev_base = base;
>               base = get_addr_base_and_unit_offset (base, &base_offset);
>               /* Aggregate arguments can have non-invariant addresses.  */
> --- 2922,2936 ----
>             {
>               HOST_WIDE_INT base_offset;
>               tree prev_base;
> +             bool addrof;
>
>               if (TREE_CODE (base) == ADDR_EXPR)
> !               {
> !                 base = TREE_OPERAND (base, 0);
> !                 addrof = true;
> !               }
> !             else
> !               addrof = false;
>               prev_base = base;
>               base = get_addr_base_and_unit_offset (base, &base_offset);
>               /* Aggregate arguments can have non-invariant addresses.  */
> *************** ipa_modify_call_arguments (struct cgraph
> *** 2934,2939 ****
> --- 2942,2952 ----
>                 }
>               else if (TREE_CODE (base) == MEM_REF)
>                 {
> +                 if (!addrof)
> +                   {
> +                     deref_base = true;
> +                     deref_align = TYPE_ALIGN (TREE_TYPE (base));
> +                   }
>                   off = build_int_cst (adj->alias_ptr_type,
>                                        base_offset
>                                        + adj->offset / BITS_PER_UNIT);
> *************** ipa_modify_call_arguments (struct cgraph
> *** 2956,2962 ****
>               unsigned int align;
>               unsigned HOST_WIDE_INT misalign;
>
> !             get_pointer_alignment_1 (base, &align, &misalign);
>               misalign += (tree_to_double_int (off)
>                            .sext (TYPE_PRECISION (TREE_TYPE (off))).low
>                            * BITS_PER_UNIT);
> --- 2969,2985 ----
>               unsigned int align;
>               unsigned HOST_WIDE_INT misalign;
>
> !             if (deref_base)
> !               {
> !                 align = deref_align;
> !                 misalign = 0;
> !               }
> !             else
> !               {
> !                 get_pointer_alignment_1 (base, &align, &misalign);
> !                 if (TYPE_ALIGN (type) > align)
> !                   align = TYPE_ALIGN (type);
> !               }
>               misalign += (tree_to_double_int (off)
>                            .sext (TYPE_PRECISION (TREE_TYPE (off))).low
>                            * BITS_PER_UNIT);
> *** /dev/null   Thu Oct 25 13:49:30 2012
> --- gcc/testsuite/gcc.dg/ipa/ipa-sra-7.c        Wed Nov 28 14:19:30 2012
> ***************
> *** 0 ****
> --- 1,42 ----
> + /* { dg-do run } */
> + /* { dg-options "-O3" } */
> +
> + typedef unsigned int myint __attribute__((aligned(1)));
> +
> + typedef struct __attribute__((packed)) S {
> +   unsigned a, b, c;
> + } SS;
> +
> + typedef SS __attribute__((aligned(1))) SSS;
> +
> +
> + static unsigned int __attribute__ ((noinline))
> + get_a (SSS *p)
> + {
> +   return p->a;
> + };
> +
> + static int __attribute__ ((noinline, noclone))
> + foo (SS *p)
> + {
> +   int r = (int) get_a(p) + 2;
> +   return r;
> + }
> +
> + char buf[512];
> +
> + static SSS * __attribute__ ((noinline, noclone))
> + get_sss (void)
> + {
> +   return (SSS *)(buf + 1);
> + }
> +
> +
> + int
> + main(int argc, char *argv[])
> + {
> +   SSS *p = get_sss();
> +   if (foo(p) != 2)
> +     __builtin_abort ();
> +   return 0;
> + }
> *** /dev/null   Thu Oct 25 13:49:30 2012
> --- gcc/testsuite/gcc.dg/ipa/ipa-sra-8.c        Fri Nov 30 00:52:03 2012
> ***************
> *** 0 ****
> --- 1,41 ----
> + /* { dg-do run } */
> + /* { dg-options "-O3" } */
> +
> + typedef unsigned int myint __attribute__((aligned(1)));
> +
> + typedef struct S {
> +   unsigned a, b, c;
> + } SS;
> +
> + typedef SS __attribute__((aligned(1))) SSS;
> +
> +
> + static unsigned int __attribute__ ((noinline))
> + get_a (SS s)
> + {
> +   return s.a;
> + };
> +
> + static int __attribute__ ((noinline, noclone))
> + foo (SSS *p)
> + {
> +   int r = (int) get_a(*p) + 2;
> +   return r;
> + }
> +
> + char buf[512];
> +
> + static SSS * __attribute__ ((noinline, noclone))
> + get_sss (void)
> + {
> +   return (SSS *)(buf + 1);
> + }
> +
> + int
> + main(int argc, char *argv[])
> + {
> +   SSS *p = get_sss();
> +   if (foo(p) != 2)
> +     __builtin_abort ();
> +   return 0;
> + }
> *** /dev/null   Thu Oct 25 13:49:30 2012
> --- gcc/testsuite/gcc.dg/ipa/ipa-sra-9.c        Fri Nov 30 00:52:12 2012
> ***************
> *** 0 ****
> --- 1,44 ----
> + /* { dg-do run } */
> + /* { dg-options "-O3" } */
> +
> + typedef unsigned int myint __attribute__((aligned(1)));
> +
> + typedef struct S {
> +   unsigned a, b, c;
> + } SS;
> +
> + typedef struct U {
> +   SS s[2];
> + } UU;
> +
> + typedef UU __attribute__((aligned(1))) UUU;
> +
> + static unsigned int __attribute__ ((noinline))
> + get_a (SS s)
> + {
> +   return s.a;
> + };
> +
> + static int __attribute__ ((noinline, noclone))
> + foo (UUU *p)
> + {
> +   int r = (int) get_a(p->s[0]) + 2;
> +   return r;
> + }
> +
> + char buf[512];
> +
> + static UUU * __attribute__ ((noinline, noclone))
> + get_uuu (void)
> + {
> +   return (UUU *)(buf + 1);
> + }
> +
> + int
> + main(int argc, char *argv[])
> + {
> +   UUU *p = get_uuu();
> +   if (foo(p) != 2)
> +     __builtin_abort ();
> +   return 0;
> + }
> *** /dev/null   Thu Oct 25 13:49:30 2012
> --- gcc/testsuite/gcc.target/i386/pr55448.c     Thu Nov 29 15:42:16 2012
> ***************
> *** 0 ****
> --- 1,26 ----
> + /* { dg-do compile } */
> + /* { dg-options "-O2 -mavx" } */
> +
> + #include <immintrin.h>
> +
> + static inline __m256 add1(const __m256 *a, const __m256 *b)
> + {
> +   return _mm256_add_ps(*a, *b);
> + }
> +
> + void foo1(__m256 *a, const __m256 b)
> + {
> +   *a = add1(a, &b);
> + }
> +
> + static inline __m128 add2(const __m128 *a, const __m128 *b)
> + {
> +   return _mm_add_ps(*a, *b);
> + }
> +
> + void foo2(__m128 *a, const __m128 b)
> + {
> +   *a = add2(a, &b);
> + }
> +
> + /* { dg-final { scan-assembler-not "vmovups" } } */
> *** /tmp/yTaOsa_tree-sra.c      Fri Nov 30 12:26:26 2012
> --- gcc/tree-sra.c      Wed Nov 28 14:19:30 2012
> *************** unmodified_by_ref_scalar_representative
> *** 3891,3902 ****
>     return repr;
>   }
>
> ! /* Return true iff this access precludes IPA-SRA of the parameter it is
> !    associated with. */
>
>   static bool
> ! access_precludes_ipa_sra_p (struct access *access)
>   {
>     /* Avoid issues such as the second simple testcase in PR 42025.  The 
> problem
>        is incompatible assign in a call statement (and possibly even in asm
>        statements).  This can be relaxed by using a new temporary but only for
> --- 3891,3903 ----
>     return repr;
>   }
>
> ! /* Return true iff this ACCESS precludes IPA-SRA of the parameter it is
> !    associated with.  REQ_ALIGN is the minimum required alignment.  */
>
>   static bool
> ! access_precludes_ipa_sra_p (struct access *access, unsigned int req_align)
>   {
> +   unsigned int exp_align;
>     /* Avoid issues such as the second simple testcase in PR 42025.  The 
> problem
>        is incompatible assign in a call statement (and possibly even in asm
>        statements).  This can be relaxed by using a new temporary but only for
> *************** access_precludes_ipa_sra_p (struct acces
> *** 3908,3913 ****
> --- 3909,3918 ----
>           || gimple_code (access->stmt) == GIMPLE_ASM))
>       return true;
>
> +   exp_align = get_object_alignment (access->expr);
> +   if (exp_align < req_align)
> +     return true;
> +
>     return false;
>   }
>
> *************** splice_param_accesses (tree parm, bool *
> *** 3943,3949 ****
>         tree a1_alias_type;
>         access = (*access_vec)[i];
>         modification = access->write;
> !       if (access_precludes_ipa_sra_p (access))
>         return NULL;
>         a1_alias_type = reference_alias_ptr_type (access->expr);
>
> --- 3948,3954 ----
>         tree a1_alias_type;
>         access = (*access_vec)[i];
>         modification = access->write;
> !       if (access_precludes_ipa_sra_p (access, TYPE_ALIGN (access->type)))
>         return NULL;
>         a1_alias_type = reference_alias_ptr_type (access->expr);
>
> *************** splice_param_accesses (tree parm, bool *
> *** 3966,3972 ****
>           else if (ac2->size != access->size)
>             return NULL;
>
> !         if (access_precludes_ipa_sra_p (ac2)
>               || (ac2->type != access->type
>                   && (TREE_ADDRESSABLE (ac2->type)
>                       || TREE_ADDRESSABLE (access->type)))
> --- 3971,3977 ----
>           else if (ac2->size != access->size)
>             return NULL;
>
> !         if (access_precludes_ipa_sra_p (ac2, TYPE_ALIGN (access->type))
>               || (ac2->type != access->type
>                   && (TREE_ADDRESSABLE (ac2->type)
>                       || TREE_ADDRESSABLE (access->type)))

Reply via email to