On Mon, Jul 30, 2018 at 1:41 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > Invariant loads were handled as a variation on the code for contiguous > loads. We detected whether they were invariant or not as a byproduct of > creating the vector pointer ivs: vect_create_data_ref_ptr passed back an > inv_p to say whether the pointer was invariant. > > But vectorised invariant loads just keep the original scalar load, > so this meant that detecting invariant loads had the side-effect of > creating an unwanted vector pointer iv. The placement of the code > also meant that we'd create a vector load and then not use the result. > In principle this is wrong code, since there's no guarantee that there's > a vector's worth of accessible data at that address, but we rely on DCE > to get rid of the load before any harm is done. > > E.g., for an invariant load in an inner loop (which seems like the more > common use case for this code), we'd create: > > vectp_a.6_52 = &a + 4; > > # vectp_a.5_53 = PHI <vectp_a.5_54(9), vectp_a.6_52(2)> > > # vectp_a.5_55 = PHI <vectp_a.5_53(3), vectp_a.5_56(10)> > > vect_next_a_11.7_57 = MEM[(int *)vectp_a.5_55]; > next_a_11 = a[_1]; > vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11}; > > vectp_a.5_56 = vectp_a.5_55 + 4; > > vectp_a.5_54 = vectp_a.5_53 + 0; > > whereas all we want is: > > next_a_11 = a[_1]; > vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11}; > > This patch moves the handling to its own block and makes > vect_create_data_ref_ptr assert (when creating a full iv) that the > address isn't invariant. > > The ncopies handling is unfortunate, but a preexisting issue. > Richi's suggestion of using a vector of vector statements would > let us reuse one statement for all copies.
OK. Richard. > > 2018-07-30 Richard Sandiford <richard.sandif...@arm.com> > > gcc/ > * tree-vectorizer.h (vect_create_data_ref_ptr): Remove inv_p > parameter. > * tree-vect-data-refs.c (vect_create_data_ref_ptr): Likewise. > When creating an iv, assert that the step is not known to be zero. > (vect_setup_realignment): Update call accordingly. > * tree-vect-stmts.c (vectorizable_store): Likewise. > (vectorizable_load): Likewise. Handle VMAT_INVARIANT separately. > > Index: gcc/tree-vectorizer.h > =================================================================== > *** gcc/tree-vectorizer.h 2018-07-30 12:32:29.586506669 +0100 > --- gcc/tree-vectorizer.h 2018-07-30 12:40:13.000000000 +0100 > *************** extern bool vect_analyze_data_refs (vec_ > *** 1527,1533 **** > extern void vect_record_base_alignments (vec_info *); > extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, > tree, > tree *, gimple_stmt_iterator *, > ! gimple **, bool, bool *, > tree = NULL_TREE, tree = NULL_TREE); > extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, > stmt_vec_info, tree); > --- 1527,1533 ---- > extern void vect_record_base_alignments (vec_info *); > extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, > tree, > tree *, gimple_stmt_iterator *, > ! gimple **, bool, > tree = NULL_TREE, tree = NULL_TREE); > extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, > stmt_vec_info, tree); > Index: gcc/tree-vect-data-refs.c > =================================================================== > *** gcc/tree-vect-data-refs.c 2018-07-30 12:32:26.214536374 +0100 > --- gcc/tree-vect-data-refs.c 2018-07-30 12:32:32.546480596 +0100 > *************** vect_create_addr_base_for_vector_ref (st > *** 4674,4689 **** > > Return the increment stmt that updates the pointer in PTR_INCR. > > ! 3. Set INV_P to true if the access pattern of the data reference in the > ! vectorized loop is invariant. Set it to false otherwise. > ! > ! 4. Return the pointer. */ > > tree > vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type, > struct loop *at_loop, tree offset, > tree *initial_address, gimple_stmt_iterator *gsi, > ! gimple **ptr_incr, bool only_init, bool *inv_p, > tree byte_offset, tree iv_step) > { > const char *base_name; > --- 4674,4686 ---- > > Return the increment stmt that updates the pointer in PTR_INCR. > > ! 3. Return the pointer. */ > > tree > vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type, > struct loop *at_loop, tree offset, > tree *initial_address, gimple_stmt_iterator *gsi, > ! gimple **ptr_incr, bool only_init, > tree byte_offset, tree iv_step) > { > const char *base_name; > *************** vect_create_data_ref_ptr (stmt_vec_info > *** 4705,4711 **** > bool insert_after; > tree indx_before_incr, indx_after_incr; > gimple *incr; > - tree step; > bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); > > gcc_assert (iv_step != NULL_TREE > --- 4702,4707 ---- > *************** vect_create_data_ref_ptr (stmt_vec_info > *** 4726,4739 **** > *ptr_incr = NULL; > } > > - /* Check the step (evolution) of the load in LOOP, and record > - whether it's invariant. */ > - step = vect_dr_behavior (dr_info)->step; > - if (integer_zerop (step)) > - *inv_p = true; > - else > - *inv_p = false; > - > /* Create an expression for the first address accessed by this load > in LOOP. */ > base_name = get_name (DR_BASE_ADDRESS (dr)); > --- 4722,4727 ---- > *************** vect_create_data_ref_ptr (stmt_vec_info > *** 4849,4863 **** > aptr = aggr_ptr_init; > else > { > if (iv_step == NULL_TREE) > { > ! /* The step of the aggregate pointer is the type size. */ > iv_step = TYPE_SIZE_UNIT (aggr_type); > ! /* One exception to the above is when the scalar step of the load in > ! LOOP is zero. In this case the step here is also zero. */ > ! if (*inv_p) > ! iv_step = size_zero_node; > ! else if (tree_int_cst_sgn (step) == -1) > iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step); > } > > --- 4837,4853 ---- > aptr = aggr_ptr_init; > else > { > + /* Accesses to invariant addresses should be handled specially > + by the caller. */ > + tree step = vect_dr_behavior (dr_info)->step; > + gcc_assert (!integer_zerop (step)); > + > if (iv_step == NULL_TREE) > { > ! /* The step of the aggregate pointer is the type size, > ! negated for downward accesses. */ > iv_step = TYPE_SIZE_UNIT (aggr_type); > ! if (tree_int_cst_sgn (step) == -1) > iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step); > } > > *************** vect_setup_realignment (stmt_vec_info st > *** 5462,5468 **** > gphi *phi_stmt; > tree msq = NULL_TREE; > gimple_seq stmts = NULL; > - bool inv_p; > bool compute_in_loop = false; > bool nested_in_vect_loop = false; > struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father; > --- 5452,5457 ---- > *************** vect_setup_realignment (stmt_vec_info st > *** 5556,5562 **** > vec_dest = vect_create_destination_var (scalar_dest, vectype); > ptr = vect_create_data_ref_ptr (stmt_info, vectype, > loop_for_initial_load, NULL_TREE, > ! &init_addr, NULL, &inc, true, &inv_p); > if (TREE_CODE (ptr) == SSA_NAME) > new_temp = copy_ssa_name (ptr); > else > --- 5545,5551 ---- > vec_dest = vect_create_destination_var (scalar_dest, vectype); > ptr = vect_create_data_ref_ptr (stmt_info, vectype, > loop_for_initial_load, NULL_TREE, > ! &init_addr, NULL, &inc, true); > if (TREE_CODE (ptr) == SSA_NAME) > new_temp = copy_ssa_name (ptr); > else > Index: gcc/tree-vect-stmts.c > =================================================================== > *** gcc/tree-vect-stmts.c 2018-07-30 12:32:29.586506669 +0100 > --- gcc/tree-vect-stmts.c 2018-07-30 12:40:14.000000000 +0100 > *************** vectorizable_store (stmt_vec_info stmt_i > *** 6254,6260 **** > unsigned int group_size, i; > vec<tree> oprnds = vNULL; > vec<tree> result_chain = vNULL; > - bool inv_p; > tree offset = NULL_TREE; > vec<tree> vec_oprnds = vNULL; > bool slp = (slp_node != NULL); > --- 6254,6259 ---- > *************** vectorizable_store (stmt_vec_info stmt_i > *** 7018,7039 **** > { > dataref_ptr = unshare_expr (DR_BASE_ADDRESS > (first_dr_info->dr)); > dataref_offset = build_int_cst (ref_type, 0); > - inv_p = false; > } > else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > ! { > ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info, > ! &dataref_ptr, &vec_offset); > ! inv_p = false; > ! } > else > dataref_ptr > = vect_create_data_ref_ptr (first_stmt_info, aggr_type, > simd_lane_access_p ? loop : NULL, > offset, &dummy, gsi, &ptr_incr, > ! simd_lane_access_p, &inv_p, > ! NULL_TREE, bump); > ! gcc_assert (bb_vinfo || !inv_p); > } > else > { > --- 7017,7032 ---- > { > dataref_ptr = unshare_expr (DR_BASE_ADDRESS > (first_dr_info->dr)); > dataref_offset = build_int_cst (ref_type, 0); > } > else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info, > ! &dataref_ptr, &vec_offset); > else > dataref_ptr > = vect_create_data_ref_ptr (first_stmt_info, aggr_type, > simd_lane_access_p ? loop : NULL, > offset, &dummy, gsi, &ptr_incr, > ! simd_lane_access_p, NULL_TREE, > bump); > } > else > { > *************** vectorizable_load (stmt_vec_info stmt_in > *** 7419,7425 **** > bool grouped_load = false; > stmt_vec_info first_stmt_info; > stmt_vec_info first_stmt_info_for_drptr = NULL; > - bool inv_p; > bool compute_in_loop = false; > struct loop *at_loop; > int vec_num; > --- 7412,7417 ---- > *************** vectorizable_load (stmt_vec_info stmt_in > *** 7669,7674 **** > --- 7661,7723 ---- > return true; > } > > + if (memory_access_type == VMAT_INVARIANT) > + { > + gcc_assert (!grouped_load && !mask && !bb_vinfo); > + /* If we have versioned for aliasing or the loop doesn't > + have any data dependencies that would preclude this, > + then we are sure this is a loop invariant load and > + thus we can insert it on the preheader edge. */ > + bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) > + && !nested_in_vect_loop > + && hoist_defs_of_uses (stmt_info, loop)); > + if (hoist_p) > + { > + gassign *stmt = as_a <gassign *> (stmt_info->stmt); > + if (dump_enabled_p ()) > + { > + dump_printf_loc (MSG_NOTE, vect_location, > + "hoisting out of the vectorized loop: "); > + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); > + } > + scalar_dest = copy_ssa_name (scalar_dest); > + tree rhs = unshare_expr (gimple_assign_rhs1 (stmt)); > + gsi_insert_on_edge_immediate > + (loop_preheader_edge (loop), > + gimple_build_assign (scalar_dest, rhs)); > + } > + /* These copies are all equivalent, but currently the representation > + requires a separate STMT_VINFO_VEC_STMT for each one. */ > + prev_stmt_info = NULL; > + gimple_stmt_iterator gsi2 = *gsi; > + gsi_next (&gsi2); > + for (j = 0; j < ncopies; j++) > + { > + stmt_vec_info new_stmt_info; > + if (hoist_p) > + { > + new_temp = vect_init_vector (stmt_info, scalar_dest, > + vectype, NULL); > + gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp); > + new_stmt_info = vinfo->add_stmt (new_stmt); > + } > + else > + { > + new_temp = vect_init_vector (stmt_info, scalar_dest, > + vectype, &gsi2); > + new_stmt_info = vinfo->lookup_def (new_temp); > + } > + if (slp) > + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); > + else if (j == 0) > + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; > + else > + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; > + prev_stmt_info = new_stmt_info; > + } > + return true; > + } > + > if (memory_access_type == VMAT_ELEMENTWISE > || memory_access_type == VMAT_STRIDED_SLP) > { > *************** vectorizable_load (stmt_vec_info stmt_in > *** 8177,8183 **** > { > dataref_ptr = unshare_expr (DR_BASE_ADDRESS > (first_dr_info->dr)); > dataref_offset = build_int_cst (ref_type, 0); > - inv_p = false; > } > else if (first_stmt_info_for_drptr > && first_stmt_info != first_stmt_info_for_drptr) > --- 8226,8231 ---- > *************** vectorizable_load (stmt_vec_info stmt_in > *** 8186,8192 **** > = vect_create_data_ref_ptr (first_stmt_info_for_drptr, > aggr_type, at_loop, offset, > &dummy, > gsi, &ptr_incr, > simd_lane_access_p, > ! &inv_p, byte_offset, bump); > /* Adjust the pointer by the difference to first_stmt. */ > data_reference_p ptrdr > = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr); > --- 8234,8240 ---- > = vect_create_data_ref_ptr (first_stmt_info_for_drptr, > aggr_type, at_loop, offset, > &dummy, > gsi, &ptr_incr, > simd_lane_access_p, > ! byte_offset, bump); > /* Adjust the pointer by the difference to first_stmt. */ > data_reference_p ptrdr > = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr); > *************** vectorizable_load (stmt_vec_info stmt_in > *** 8199,8214 **** > stmt_info, diff); > } > else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > ! { > ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info, > ! &dataref_ptr, &vec_offset); > ! inv_p = false; > ! } > else > dataref_ptr > = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop, > offset, &dummy, gsi, &ptr_incr, > ! simd_lane_access_p, &inv_p, > byte_offset, bump); > if (mask) > vec_mask = vect_get_vec_def_for_operand (mask, stmt_info, > --- 8247,8259 ---- > stmt_info, diff); > } > else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > ! vect_get_gather_scatter_ops (loop, stmt_info, &gs_info, > ! &dataref_ptr, &vec_offset); > else > dataref_ptr > = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop, > offset, &dummy, gsi, &ptr_incr, > ! simd_lane_access_p, > byte_offset, bump); > if (mask) > vec_mask = vect_get_vec_def_for_operand (mask, stmt_info, > *************** vectorizable_load (stmt_vec_info stmt_in > *** 8492,8538 **** > } > } > > - /* 4. Handle invariant-load. */ > - if (inv_p && !bb_vinfo) > - { > - gcc_assert (!grouped_load); > - /* If we have versioned for aliasing or the loop doesn't > - have any data dependencies that would preclude this, > - then we are sure this is a loop invariant load and > - thus we can insert it on the preheader edge. */ > - if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) > - && !nested_in_vect_loop > - && hoist_defs_of_uses (stmt_info, loop)) > - { > - gassign *stmt = as_a <gassign *> (stmt_info->stmt); > - if (dump_enabled_p ()) > - { > - dump_printf_loc (MSG_NOTE, vect_location, > - "hoisting out of the vectorized " > - "loop: "); > - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); > - } > - tree tem = copy_ssa_name (scalar_dest); > - gsi_insert_on_edge_immediate > - (loop_preheader_edge (loop), > - gimple_build_assign (tem, > - unshare_expr > - (gimple_assign_rhs1 (stmt)))); > - new_temp = vect_init_vector (stmt_info, tem, > - vectype, NULL); > - new_stmt = SSA_NAME_DEF_STMT (new_temp); > - new_stmt_info = vinfo->add_stmt (new_stmt); > - } > - else > - { > - gimple_stmt_iterator gsi2 = *gsi; > - gsi_next (&gsi2); > - new_temp = vect_init_vector (stmt_info, scalar_dest, > - vectype, &gsi2); > - new_stmt_info = vinfo->lookup_def (new_temp); > - } > - } > - > if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) > { > tree perm_mask = perm_mask_for_reverse (vectype); > --- 8537,8542 ----