The following avoids creating duplicate stmts for invariant loads which was necessary when the vector stmts were in a linked list. It also fixes SLP support which didn't correctly create the appropriate number of copies.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. * tree-vect-stmts.cc (vectorizable_load): Avoid useless copies of VMAT_INVARIANT vectorized stmts, fix SLP support. --- gcc/tree-vect-stmts.cc | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 1b160cecfce..b1b08238dc3 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9612,27 +9612,26 @@ vectorizable_load (vec_info *vinfo, gimple_set_vuse (new_stmt, vuse); gsi_insert_on_edge_immediate (pe, new_stmt); } - /* These copies are all equivalent, but currently the representation - requires a separate STMT_VINFO_VEC_STMT for each one. */ - gimple_stmt_iterator gsi2 = *gsi; - gsi_next (&gsi2); - for (j = 0; j < ncopies; j++) + /* These copies are all equivalent. */ + if (hoist_p) + new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest, + vectype, NULL); + else { - if (hoist_p) - new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest, - vectype, NULL); - else - new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest, - vectype, &gsi2); - gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp); - if (slp) - SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); - else - { - if (j == 0) - *vec_stmt = new_stmt; - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); - } + gimple_stmt_iterator gsi2 = *gsi; + gsi_next (&gsi2); + new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest, + vectype, &gsi2); + } + gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp); + if (slp) + for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + else + { + for (j = 0; j < ncopies; ++j) + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + *vec_stmt = new_stmt; } return true; } -- 2.35.3