502.gcc_r when built with -fprofile-generate exposes a SLP discovery
issue where an IV forced live due to early break is not properly
discovered if its latch def is part of a different IVs SSA cycle.
To mitigate this we have to make sure to create an SLP instance
for the original IV.  Ideally we'd handle all vect_induction_def
the same but this is left for next stage1.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

        PR tree-optimizatio/118852
        * tree-vect-slp.cc (vect_analyze_slp): For early-break
        forced-live IVs make sure we create an appropriate
        entry into the SLP graph.

        * gcc.dg/vect/pr118852.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr118852.c | 109 +++++++++++++++++++++++++++
 gcc/tree-vect-slp.cc                 |  24 +++++-
 2 files changed, 130 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr118852.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr118852.c 
b/gcc/testsuite/gcc.dg/vect/pr118852.c
new file mode 100644
index 00000000000..71bfb450f7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr118852.c
@@ -0,0 +1,109 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-profiling "-fprofile-generate" } */
+/* { dg-additional-options "-fallow-store-data-races -fprofile-arcs" } */
+/* { dg-additional-options "-msse4.2 -mprefer-vector-width=128" { target { 
x86_64-*-* i?86-*-* } } } */
+
+#include "tree-vect.h"
+
+typedef unsigned int hashval_t;
+struct htab {
+  void ** entries;
+  unsigned long size;
+};
+typedef struct htab *htab_t;
+unsigned long htab_size (htab_t h)
+{
+  return h->size;
+}
+typedef struct
+{
+  htab_t htab;
+  void * *slot;
+  void * *limit;
+} htab_iterator;
+
+static inline void *
+first_htab_element (htab_iterator *hti, htab_t table)
+{
+  hti->htab = table;
+  hti->slot = table->entries;
+  hti->limit = hti->slot + htab_size (table);
+  do
+    {
+      void * x = *(hti->slot);
+      if (x != ((void *) 0) && x != ((void *) 1))
+ break;
+    } while (++(hti->slot) < hti->limit);
+
+  if (hti->slot < hti->limit)
+    return *(hti->slot);
+  return 
+        ((void *)0)
+            ;
+}
+
+static inline unsigned char
+end_htab_p (const htab_iterator *hti)
+{
+  if (hti->slot >= hti->limit)
+    return 1;
+  return 0;
+}
+
+static inline void *
+next_htab_element (htab_iterator *hti)
+{
+  while (++(hti->slot) < hti->limit)
+    {
+      void * x = *(hti->slot);
+      if (x != ((void *) 0) && x != ((void *) 1))
+ return x;
+    }
+  return 
+        ((void *)0)
+            ;
+}
+
+typedef unsigned long vn_nary_op_t;
+
+typedef struct vn_tables_s
+{
+  htab_t nary;
+} *vn_tables_t;
+
+vn_tables_t valid_info;
+
+void __attribute__((noipa))
+announce (vn_nary_op_t p)
+{
+  static vn_nary_op_t prev = 0;
+  if (prev == 0x70904f0 && p != 0x70904c0)
+    __builtin_abort ();
+  prev = p;
+}
+
+void __attribute__((noipa))
+set_hashtable_value_ids_1 (void)
+{
+  htab_iterator hi;
+  vn_nary_op_t vno;
+  for (vno = (vn_nary_op_t) first_htab_element (&(hi), (valid_info->nary)); 
!end_htab_p (&(hi)); vno = (vn_nary_op_t) next_htab_element (&(hi)))
+    announce (vno);
+}
+
+int main()
+{
+  if (sizeof (void *) != sizeof (vn_nary_op_t))
+    return 0;
+  check_vect ();
+  valid_info = __builtin_malloc (sizeof (struct vn_tables_s));
+  valid_info->nary = __builtin_malloc (sizeof (struct htab));
+  valid_info->nary->entries = __builtin_malloc (sizeof (void *) * 32);
+  valid_info->nary->size = 31;
+  static vn_nary_op_t x[] = { 0x70905e0, 0x0, 0x0, 0x7090610, 0x7090550, 
0x7090400, 0x70903a0, 0x0, 
+      0x0, 0x70904f0, 0x0, 0x0, 0x0, 0x0, 0x70904c0, 0x7090520, 0x7090460, 
+      0x7090490, 0x7090430, 0x0, 0x0, 0x0, 0x7090640, 0x0, 0x0, 0x70903d0, 
0x0, 
+      0x7090580, 0x0, 0x0, 0x70905b0};
+  __builtin_memcpy (valid_info->nary->entries, x, sizeof (void *) * 31);
+  set_hashtable_value_ids_1 ();
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 8ed746ea5a9..9e09f8e980b 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5031,15 +5031,33 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size,
            vec<stmt_vec_info> stmts;
            vec<stmt_vec_info> roots = vNULL;
            vec<tree> remain = vNULL;
-           gphi *lc_phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info));
-           tree def = gimple_phi_arg_def_from_edge (lc_phi, latch_e);
-           stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
+           gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info));
            stmts.create (1);
+           tree def = gimple_phi_arg_def_from_edge (phi, latch_e);
+           stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
            stmts.quick_push (vect_stmt_to_vectorize (lc_info));
            vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
                                     stmts, roots, remain,
                                     max_tree_size, &limit,
                                     bst_map, NULL, force_single_lane);
+           /* When the latch def is from a different cycle this can only
+              be a induction.  Build a simple instance for this.
+              ???  We should be able to start discovery from the PHI
+              for all inductions, but then there will be stray
+              non-SLP stmts we choke on as needing non-SLP handling.  */
+           auto_vec<stmt_vec_info, 1> tem;
+           tem.quick_push (stmt_info);
+           if (!bst_map->get (tem))
+             {
+               gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info)
+                           == vect_induction_def);
+               stmts.create (1);
+               stmts.quick_push (stmt_info);
+               vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
+                                        stmts, roots, remain,
+                                        max_tree_size, &limit,
+                                        bst_map, NULL, force_single_lane);
+             }
          }
     }
 
-- 
2.43.0

Reply via email to