502.gcc_r when built with -fprofile-generate exposes a SLP discovery issue where an IV forced live due to early break is not properly discovered if its latch def is part of a different IVs SSA cycle. To mitigate this we have to make sure to create an SLP instance for the original IV. Ideally we'd handle all vect_induction_def the same but this is left for next stage1.
Bootstrap and regtest running on x86_64-unknown-linux-gnu. PR tree-optimizatio/118852 * tree-vect-slp.cc (vect_analyze_slp): For early-break forced-live IVs make sure we create an appropriate entry into the SLP graph. * gcc.dg/vect/pr118852.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr118852.c | 109 +++++++++++++++++++++++++++ gcc/tree-vect-slp.cc | 24 +++++- 2 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr118852.c diff --git a/gcc/testsuite/gcc.dg/vect/pr118852.c b/gcc/testsuite/gcc.dg/vect/pr118852.c new file mode 100644 index 00000000000..71bfb450f7c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr118852.c @@ -0,0 +1,109 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-require-profiling "-fprofile-generate" } */ +/* { dg-additional-options "-fallow-store-data-races -fprofile-arcs" } */ +/* { dg-additional-options "-msse4.2 -mprefer-vector-width=128" { target { x86_64-*-* i?86-*-* } } } */ + +#include "tree-vect.h" + +typedef unsigned int hashval_t; +struct htab { + void ** entries; + unsigned long size; +}; +typedef struct htab *htab_t; +unsigned long htab_size (htab_t h) +{ + return h->size; +} +typedef struct +{ + htab_t htab; + void * *slot; + void * *limit; +} htab_iterator; + +static inline void * +first_htab_element (htab_iterator *hti, htab_t table) +{ + hti->htab = table; + hti->slot = table->entries; + hti->limit = hti->slot + htab_size (table); + do + { + void * x = *(hti->slot); + if (x != ((void *) 0) && x != ((void *) 1)) + break; + } while (++(hti->slot) < hti->limit); + + if (hti->slot < hti->limit) + return *(hti->slot); + return + ((void *)0) + ; +} + +static inline unsigned char +end_htab_p (const htab_iterator *hti) +{ + if (hti->slot >= hti->limit) + return 1; + return 0; +} + +static inline void * +next_htab_element (htab_iterator *hti) +{ + while (++(hti->slot) < hti->limit) + { + void * x = *(hti->slot); + if (x != ((void *) 0) && x != ((void *) 1)) + return x; + } + return + ((void *)0) + ; +} + +typedef unsigned long vn_nary_op_t; + +typedef struct vn_tables_s +{ + htab_t nary; +} *vn_tables_t; + +vn_tables_t valid_info; + +void __attribute__((noipa)) +announce (vn_nary_op_t p) +{ + static vn_nary_op_t prev = 0; + if (prev == 0x70904f0 && p != 0x70904c0) + __builtin_abort (); + prev = p; +} + +void __attribute__((noipa)) +set_hashtable_value_ids_1 (void) +{ + htab_iterator hi; + vn_nary_op_t vno; + for (vno = (vn_nary_op_t) first_htab_element (&(hi), (valid_info->nary)); !end_htab_p (&(hi)); vno = (vn_nary_op_t) next_htab_element (&(hi))) + announce (vno); +} + +int main() +{ + if (sizeof (void *) != sizeof (vn_nary_op_t)) + return 0; + check_vect (); + valid_info = __builtin_malloc (sizeof (struct vn_tables_s)); + valid_info->nary = __builtin_malloc (sizeof (struct htab)); + valid_info->nary->entries = __builtin_malloc (sizeof (void *) * 32); + valid_info->nary->size = 31; + static vn_nary_op_t x[] = { 0x70905e0, 0x0, 0x0, 0x7090610, 0x7090550, 0x7090400, 0x70903a0, 0x0, + 0x0, 0x70904f0, 0x0, 0x0, 0x0, 0x0, 0x70904c0, 0x7090520, 0x7090460, + 0x7090490, 0x7090430, 0x0, 0x0, 0x0, 0x7090640, 0x0, 0x0, 0x70903d0, 0x0, + 0x7090580, 0x0, 0x0, 0x70905b0}; + __builtin_memcpy (valid_info->nary->entries, x, sizeof (void *) * 31); + set_hashtable_value_ids_1 (); +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 8ed746ea5a9..9e09f8e980b 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -5031,15 +5031,33 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, vec<stmt_vec_info> stmts; vec<stmt_vec_info> roots = vNULL; vec<tree> remain = vNULL; - gphi *lc_phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info)); - tree def = gimple_phi_arg_def_from_edge (lc_phi, latch_e); - stmt_vec_info lc_info = loop_vinfo->lookup_def (def); + gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info)); stmts.create (1); + tree def = gimple_phi_arg_def_from_edge (phi, latch_e); + stmt_vec_info lc_info = loop_vinfo->lookup_def (def); stmts.quick_push (vect_stmt_to_vectorize (lc_info)); vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, stmts, roots, remain, max_tree_size, &limit, bst_map, NULL, force_single_lane); + /* When the latch def is from a different cycle this can only + be a induction. Build a simple instance for this. + ??? We should be able to start discovery from the PHI + for all inductions, but then there will be stray + non-SLP stmts we choke on as needing non-SLP handling. */ + auto_vec<stmt_vec_info, 1> tem; + tem.quick_push (stmt_info); + if (!bst_map->get (tem)) + { + gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) + == vect_induction_def); + stmts.create (1); + stmts.quick_push (stmt_info); + vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL, force_single_lane); + } } } -- 2.43.0