When making the SLP tree a graph I messed up and in some cases we can
fail to propagate a higher max_nunits upwards when re-using a subtree.
For the testcase, but only on the gcc-9-branch, this causes us to
miss a higher value of 4 completely since it comes from a
single widening stmt which we remember but throw out of the window
due to an early mismatch fixable by commutating.

I hope TYPE_VECTOR_SUBPARTS are all ordered, at least aarch64-sve.exp
is clean as it gets with a cc1 cross (it doesn't seem to find
gcc/include/stdint.h so has more compile-time errors than necessary...).

Richard, is that an OK-ish assumption?

For dumping I simply use estimated_poly_value (didn't find a
format for dump_printf_loc suitable for poly_uint64).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Will apply unless you find issues with the patch.

Thanks,
Richard.

2019-08-28  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/91568
        * tree-vectorizer.h (_slp_tree::max_nunits): Add.
        * tree-vect-slp.c (vect_create_new_slp_node): Initialize it.
        (vect_build_slp_tree): Record max_nunits into the subtree
        and merge it upwards.
        (vect_print_slp_tree): Print max_nunits.

        * gfortran.dg/pr91568.f: New testcase.

Index: gcc/testsuite/gfortran.dg/pr91568.f
===================================================================
--- gcc/testsuite/gfortran.dg/pr91568.f (nonexistent)
+++ gcc/testsuite/gfortran.dg/pr91568.f (working copy)
@@ -0,0 +1,11 @@
+! { dg-do compile }
+! { dg-options "-Ofast" }
+      subroutine h3dall(z,hvec,hder,nterms)
+      complex *16 hvec(0:1),hder(0:1)
+      complex *16 z,zinv,ztmp/1.0/
+      zinv=1.0/z
+      do i=1,nterms
+         ztmp=zinv*i
+         hder(i)=hvec(i-1)-ztmp*hvec(i)
+      enddo
+      end
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c (revision 274983)
+++ gcc/tree-vect-slp.c (working copy)
@@ -129,6 +129,7 @@ vect_create_new_slp_node (vec<stmt_vec_i
   SLP_TREE_TWO_OPERATORS (node) = false;
   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
   node->refcnt = 1;
+  node->max_nunits = 1;
 
   unsigned i;
   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
@@ -1051,15 +1052,24 @@ vect_build_slp_tree (vec_info *vinfo,
        dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
                         *leader ? "" : "failed ", *leader);
       if (*leader)
-       (*leader)->refcnt++;
+       {
+         (*leader)->refcnt++;
+         *max_nunits = ordered_max ((*leader)->max_nunits, *max_nunits);
+       }
       return *leader;
     }
-  slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
+  poly_uint64 this_max_nunits = 1;
+  slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
+                                       &this_max_nunits,
                                        matches, npermutes, tree_size,
                                        max_tree_size, bst_map);
-  /* Keep a reference for the bst_map use.  */
   if (res)
-    res->refcnt++;
+    {
+      res->max_nunits = this_max_nunits;
+      *max_nunits = ordered_max (this_max_nunits, *max_nunits);
+      /* Keep a reference for the bst_map use.  */
+      res->refcnt++;
+    }
   bst_map->put (stmts.copy (), res);
   return res;
 }
@@ -1463,9 +1473,10 @@ vect_print_slp_tree (dump_flags_t dump_k
 
   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
   dump_user_location_t user_loc = loc.get_user_location ();
-  dump_printf_loc (metadata, user_loc, "node%s %p\n",
+  dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
                   SLP_TREE_DEF_TYPE (node) != vect_internal_def
-                  ? " (external)" : "", node);
+                  ? " (external)" : "", node,
+                  estimated_poly_value (node->max_nunits));
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
     dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
   if (SLP_TREE_CHILDREN (node).is_empty ())
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h       (revision 274983)
+++ gcc/tree-vectorizer.h       (working copy)
@@ -132,6 +132,9 @@ struct _slp_tree {
   unsigned int vec_stmts_size;
   /* Reference count in the SLP graph.  */
   unsigned int refcnt;
+  /* The maximum number of vector elements for the subtree rooted
+     at this node.  */
+  poly_uint64 max_nunits;
   /* Whether the scalar computations use two different operators.  */
   bool two_operators;
   /* The DEF type of this node.  */

Reply via email to