On Tue, Jul 9, 2019 at 12:13 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> The 4 testcases below weren't vectorized, because while
> tree-vect-data-refs.c now allows more forms of simd lane access,
> scan_operand_equal_p didn't allow combining them together.
>
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
> committed to trunk.
>
> 2019-07-08  Jakub Jelinek  <ja...@redhat.com>
>
>         * tree-vect-stmts.c (scan_operand_equal_p): Look through MEM_REF
>         with SSA_NAME address of POINTER_PLUS_EXPR.  Handle MULT_EXPR
>         and casts in offset when different, both through gimple stmts
>         and through trees.  Rewritten using loops to minimize code duplication
>         for each operand.
>
>         * g++.dg/vect/simd-6.cc: Replace xfail with target x86.
>         * g++.dg/vect/simd-9.cc: Likewise.
>
>         * testsuite/libgomp.c++/scan-13.C: Replace xfail with target x86.
>         * testsuite/libgomp.c++/scan-16.C: Likewise.
>
> --- gcc/tree-vect-stmts.c.jj    2019-07-04 09:24:28.595303590 +0200
> +++ gcc/tree-vect-stmts.c       2019-07-08 20:59:52.376285636 +0200
> @@ -6334,30 +6334,88 @@ get_group_alias_ptr_type (stmt_vec_info
>  static bool
>  scan_operand_equal_p (tree ref1, tree ref2)
>  {
> -  machine_mode mode1, mode2;
> -  poly_int64 bitsize1, bitsize2, bitpos1, bitpos2;
> -  tree offset1, offset2;
> -  int unsignedp1, unsignedp2, reversep1, reversep2;
> -  int volatilep1 = 0, volatilep2 = 0;
> -  tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1,
> -                                   &mode1, &unsignedp1, &reversep1,
> -                                   &volatilep1);
> -  tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2,
> -                                   &mode2, &unsignedp2, &reversep2,
> -                                   &volatilep2);
> -  if (reversep1 || reversep2 || volatilep1 || volatilep2)
> -    return false;
> -  if (!operand_equal_p (base1, base2, 0))
> -    return false;
> -  if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0))
> -    return false;
> -  if (maybe_ne (bitsize1, bitsize2))
> +  tree ref[2] = { ref1, ref2 };
> +  poly_int64 bitsize[2], bitpos[2];
> +  tree offset[2], base[2];
> +  for (int i = 0; i < 2; ++i)
> +    {
> +      machine_mode mode;
> +      int unsignedp, reversep, volatilep = 0;
> +      base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
> +                                    &offset[i], &mode, &unsignedp,
> +                                    &reversep, &volatilep);
> +      if (reversep || volatilep || maybe_ne (bitpos[i], 0))
> +       return false;
> +      if (TREE_CODE (base[i]) == MEM_REF
> +         && offset[i] == NULL_TREE
> +         && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
> +       {
> +         gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
> +         if (is_gimple_assign (def_stmt)
> +             && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
> +             && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
> +             && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
> +           {
> +             if (maybe_ne (mem_ref_offset (base[i]), 0))
> +               return false;
> +             base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
> +             offset[i] = gimple_assign_rhs2 (def_stmt);
> +           }
> +       }
> +    }
> +
> +  if (!operand_equal_p (base[0], base[1], 0))
>      return false;
> -  if (offset1 != offset2
> -      && (!offset1
> -         || !offset2
> -         || !operand_equal_p (offset1, offset2, 0)))
> +  if (maybe_ne (bitsize[0], bitsize[1]))
>      return false;
> +  if (offset[0] != offset[1])
> +    {
> +      if (!offset[0] || !offset[1])
> +       return false;
> +      if (!operand_equal_p (offset[0], offset[1], 0))
> +       {
> +         tree step[2];
> +         for (int i = 0; i < 2; ++i)
> +           {
> +             step[i] = integer_one_node;
> +             if (TREE_CODE (offset[i]) == SSA_NAME)
> +               {
> +                 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
> +                 if (is_gimple_assign (def_stmt)
> +                     && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
> +                     && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
> +                         == INTEGER_CST))
> +                   {
> +                     step[i] = gimple_assign_rhs2 (def_stmt);
> +                     offset[i] = gimple_assign_rhs1 (def_stmt);
> +                   }
> +               }
> +             else if (TREE_CODE (offset[i]) == MULT_EXPR)
> +               {
> +                 step[i] = TREE_OPERAND (offset[i], 1);
> +                 offset[i] = TREE_OPERAND (offset[i], 0);
> +               }
> +             tree rhs1 = NULL_TREE;
> +             if (TREE_CODE (offset[i]) == SSA_NAME)
> +               {
> +                 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
> +                 if (gimple_assign_cast_p (def_stmt))
> +                   rhs1 = gimple_assign_rhs1 (def_stmt);
> +               }
> +             else if (CONVERT_EXPR_P (offset[i]))
> +               rhs1 = TREE_OPERAND (offset[i], 0);
> +             if (rhs1
> +                 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
> +                 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
> +                 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
> +                     >= TYPE_PRECISION (TREE_TYPE (rhs1))))
> +               offset[i] = rhs1;
> +           }
> +         if (!operand_equal_p (offset[0], offset[1], 0)
> +             || !operand_equal_p (step[0], step[1], 0))
> +           return false;

seeing all this it might be easier to use

 tree_to_aff_combination_expand (ref1, TREE_TYPE (ref1), &aff1...);
tree_to_aff_combination_expand (ref2, TREE_TYPE (ref2), &aff2...);
 aff_combination_scale (&aff2, -1)
aff_combination_add (&aff1, &aff2);
 return aff_combination_zero_p (&aff1);

where you probably need to feed it ADDR_EXPR of ref1/ref2
(or add a address_of_tree_to_aff_combination helper doing that).

conversions is where that might fail though ...

Richard.


> +       }
> +    }
>    return true;
>  }
>
> --- gcc/testsuite/g++.dg/vect/simd-6.cc.jj      2019-06-21 08:47:04.176673236 
> +0200
> +++ gcc/testsuite/g++.dg/vect/simd-6.cc 2019-07-08 20:42:18.599409663 +0200
> @@ -1,7 +1,7 @@
>  // { dg-require-effective-target size32plus }
>  // { dg-additional-options "-fopenmp-simd" }
>  // { dg-additional-options "-mavx" { target avx_runtime } }
> -// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { 
> xfail *-*-* } } }
> +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { 
> target i?86-*-* x86_64-*-* } } }
>
>  #include "../../gcc.dg/vect/tree-vect.h"
>
> --- gcc/testsuite/g++.dg/vect/simd-9.cc.jj      2019-06-21 08:47:04.176673236 
> +0200
> +++ gcc/testsuite/g++.dg/vect/simd-9.cc 2019-07-08 20:42:33.378169789 +0200
> @@ -1,7 +1,7 @@
>  // { dg-require-effective-target size32plus }
>  // { dg-additional-options "-fopenmp-simd" }
>  // { dg-additional-options "-mavx" { target avx_runtime } }
> -// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { 
> xfail *-*-* } } }
> +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { 
> target i?86-*-* x86_64-*-* } } }
>
>  #include "../../gcc.dg/vect/tree-vect.h"
>
> --- libgomp/testsuite/libgomp.c++/scan-13.C.jj  2019-07-06 09:51:48.405289370 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-13.C     2019-07-08 20:45:43.957076490 
> +0200
> @@ -1,7 +1,7 @@
>  // { dg-require-effective-target size32plus }
>  // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" }
>  // { dg-additional-options "-mavx" { target avx_runtime } }
> -// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { 
> xfail *-*-* } } }
> +// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { 
> target i?86-*-* x86_64-*-* } } }
>
>  extern "C" void abort ();
>
> --- libgomp/testsuite/libgomp.c++/scan-16.C.jj  2019-07-06 09:51:48.406289354 
> +0200
> +++ libgomp/testsuite/libgomp.c++/scan-16.C     2019-07-08 20:45:56.709869498 
> +0200
> @@ -1,7 +1,7 @@
>  // { dg-require-effective-target size32plus }
>  // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" }
>  // { dg-additional-options "-mavx" { target avx_runtime } }
> -// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { 
> xfail *-*-* } } }
> +// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { 
> target i?86-*-* x86_64-*-* } } }
>
>  extern "C" void abort ();
>
>
>         Jakub

Reply via email to