On Wed, 27 May 2015, Kyrill Tkachov wrote:

> Hi Richard,
> 
> On 26/05/15 14:54, Richard Biener wrote:
> > The following fixes the testcase in PR66142
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> > 
> > Richard.
> > 
> > 2015-05-26  Richard Biener  <rguent...@suse.de>
> > 
> >     PR tree-optimization/66142
> >     * tree-ssa-sccvn.c (vn_reference_lookup_3): Manually compare
> >     MEM_REFs for the same base address.
> > 
> >     * gcc.dg/tree-ssa/ssa-fre-44.c: New testcase.
> > 
> > Index: gcc/tree-ssa-sccvn.c
> > ===================================================================
> > --- gcc/tree-ssa-sccvn.c    (revision 223574)
> > +++ gcc/tree-ssa-sccvn.c    (working copy)
> > @@ -1894,7 +1894,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
> >         size2 = lhs_ref.size;
> >         maxsize2 = lhs_ref.max_size;
> >         if (maxsize2 == -1
> > -     || (base != base2 && !operand_equal_p (base, base2, 0))
> > +     || (base != base2
> > +         && (TREE_CODE (base) != MEM_REF
> > +             || TREE_CODE (base2) != MEM_REF
> > +             || TREE_OPERAND (base, 0) != TREE_OPERAND (base2, 0)
> > +             || !tree_int_cst_equal (TREE_OPERAND (base, 1),
> > +                                     TREE_OPERAND (base2, 1))))
> >       || offset2 > offset
> >       || offset2 + size2 < offset + maxsize)
> >     return (void *)-1;
> > Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> > ===================================================================
> > --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c      (revision 0)
> > +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c      (working copy)
> > @@ -0,0 +1,62 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O -fdump-tree-fre1" } */
> > +
> > +struct A { float x, y; };
> > +struct B { struct A u; };
> > +void bar (struct A *);
> > +
> > +float
> > +f1 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  struct A *q = &x[y].u;
> > +  *q = p;
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> > +
> > +float
> > +f2 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  x[y].u = p;
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> > +
> > +float
> > +f3 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  struct A *q = &x[y].u;
> > +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> > +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> > +  *q = p;
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> > +
> > +float
> > +f4 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  __builtin_memcpy (&x[y].u.x, &p.x, sizeof (float));
> > +  __builtin_memcpy (&x[y].u.y, &p.y, sizeof (float));
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> 
> I see this test failing on arm-none-eabi. In particular, the f4 dump is the
> only one
> that doesn't contain "return 3.0". Instead it is:
> f4 (struct B * x, int y)
> {
>   float f;
>   struct A p;
>   unsigned int y.3_5;
>   unsigned int _6;
>   struct B * _8;
>   float * _9;
>   float * _14;
>   float _19;
>   float _23;
> 
>   <bb 2>:
>   p.x = 1.0e+0;
>   p.y = 2.0e+0;
>   y.3_5 = (unsigned int) y_4(D);
>   _6 = y.3_5 * 8;
>   _8 = x_7(D) + _6;
>   _9 = &_8->u.x;
>   __builtin_memcpy (_9, &p.x, 4);
>   _14 = &_8->u.y;
>   __builtin_memcpy (_14, &p.y, 4);
>   _19 = _8->u.x;
>   _23 = _8->u.y;
>   f_24 = _19 + _23;
>   bar (&p);
>   p ={v} {CLOBBER};
>   return f_24;
> 
> }
> 
> Thanks,
> Kyrill

Thanks - the following patch fixes this (tested with a cross).  It
also removes a spurious aggregate assignment from f3 which makes
it fail without the patch as well (as expected).

Bootstrap / regtest in progress on x86_64-unknown-linux-gnu.

Richard.

2015-05-28  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/66142
        * tree-ssa-sccvn.c (vn_reference_lookup_3): Handle non-GIMPLE
        values better in memcpy destination handling.  Handle non-aliasing
        we discover here.

        * gcc.dg/tree-ssa/ssa-fre-44.c: Fixup.

Index: gcc/tree-ssa-sccvn.c
===================================================================
*** gcc/tree-ssa-sccvn.c        (revision 223802)
--- gcc/tree-ssa-sccvn.c        (working copy)
*************** vn_reference_lookup_3 (ao_ref *ref, tree
*** 2028,2034 ****
        lhs = gimple_call_arg (def_stmt, 0);
        lhs_offset = 0;
        if (TREE_CODE (lhs) == SSA_NAME)
!       lhs = SSA_VAL (lhs);
        if (TREE_CODE (lhs) == ADDR_EXPR)
        {
          tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (lhs, 0),
--- 2028,2043 ----
        lhs = gimple_call_arg (def_stmt, 0);
        lhs_offset = 0;
        if (TREE_CODE (lhs) == SSA_NAME)
!       {
!         lhs = SSA_VAL (lhs);
!         if (TREE_CODE (lhs) == SSA_NAME)
!           {
!             gimple def_stmt = SSA_NAME_DEF_STMT (lhs);
!             if (gimple_assign_single_p (def_stmt)
!                 && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
!               lhs = gimple_assign_rhs1 (def_stmt);
!           }
!       }
        if (TREE_CODE (lhs) == ADDR_EXPR)
        {
          tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (lhs, 0),
*************** vn_reference_lookup_3 (ao_ref *ref, tree
*** 2039,2044 ****
--- 2048,2055 ----
              && tree_fits_uhwi_p (TREE_OPERAND (tem, 1)))
            {
              lhs = TREE_OPERAND (tem, 0);
+             if (TREE_CODE (lhs) == SSA_NAME)
+               lhs = SSA_VAL (lhs);
              lhs_offset += tree_to_uhwi (TREE_OPERAND (tem, 1));
            }
          else if (DECL_P (tem))
*************** vn_reference_lookup_3 (ao_ref *ref, tree
*** 2089,2098 ****
                  || TREE_OPERAND (lhs, 0) != base)))
        return (void *)-1;
  
-       /* And the access has to be contained within the memcpy destination.  */
        at = offset / BITS_PER_UNIT;
        if (TREE_CODE (base) == MEM_REF)
        at += tree_to_uhwi (TREE_OPERAND (base, 1));
        if (lhs_offset > at
          || lhs_offset + copy_size < at + maxsize / BITS_PER_UNIT)
        return (void *)-1;
--- 2100,2114 ----
                  || TREE_OPERAND (lhs, 0) != base)))
        return (void *)-1;
  
        at = offset / BITS_PER_UNIT;
        if (TREE_CODE (base) == MEM_REF)
        at += tree_to_uhwi (TREE_OPERAND (base, 1));
+       /* If the access is completely outside of the memcpy destination
+        area there is no aliasing.  */
+       if (lhs_offset >= at + maxsize / BITS_PER_UNIT
+         || lhs_offset + copy_size <= at)
+       return NULL;
+       /* And the access has to be contained within the memcpy destination.  */
        if (lhs_offset > at
          || lhs_offset + copy_size < at + maxsize / BITS_PER_UNIT)
        return (void *)-1;
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c  (revision 223802)
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c  (working copy)
*************** f3 (struct B *x, int y)
*** 39,45 ****
    struct A *q = &x[y].u;
    __builtin_memcpy (&q->x, &p.x, sizeof (float));
    __builtin_memcpy (&q->y, &p.y, sizeof (float));
-   *q = p;
    float f = x[y].u.x + x[y].u.y;
    bar (&p);
    return f;
--- 39,44 ----

Reply via email to