Committed, thanks Richard. Pan
-----Original Message----- From: Gcc-patches <gcc-patches-bounces+pan2.li=intel....@gcc.gnu.org> On Behalf Of Richard Biener via Gcc-patches Sent: Tuesday, August 15, 2023 8:35 PM To: Juzhe-Zhong <juzhe.zh...@rivai.ai> Cc: gcc-patches@gcc.gnu.org; richard.sandif...@arm.com Subject: Re: [PATCH V2] VECT: Apply MASK_LEN_{LOAD_LANES, STORE_LANES} into vectorizer On Tue, 15 Aug 2023, Juzhe-Zhong wrote: > Hi, Richard and Richi. > > This patch is adding MASK_LEN_{LOAD_LANES,STORE_LANES} support into > vectorizer. > > Consider this simple case: > > void __attribute__ ((noinline, noclone)) > foo (int *__restrict a, int *__restrict b, int *__restrict c, > int *__restrict d, int *__restrict e, int *__restrict f, > int *__restrict g, int *__restrict h, int *__restrict j, int n) > { > for (int i = 0; i < n; ++i) > { > a[i] = j[i * 8]; > b[i] = j[i * 8 + 1]; > c[i] = j[i * 8 + 2]; > d[i] = j[i * 8 + 3]; > e[i] = j[i * 8 + 4]; > f[i] = j[i * 8 + 5]; > g[i] = j[i * 8 + 6]; > h[i] = j[i * 8 + 7]; > } > } > > RVV Gimple IR: > > _79 = .SELECT_VL (ivtmp_81, POLY_INT_CST [4, 4]); > ivtmp_125 = _79 * 32; > vect_array.8 = .MASK_LEN_LOAD_LANES (vectp_j.6_124, 32B, { -1, ... }, _79, > 0); > vect__8.9_122 = vect_array.8[0]; > vect__8.10_121 = vect_array.8[1]; > vect__8.11_120 = vect_array.8[2]; > vect__8.12_119 = vect_array.8[3]; > vect__8.13_118 = vect_array.8[4]; > vect__8.14_117 = vect_array.8[5]; > vect__8.15_116 = vect_array.8[6]; > vect__8.16_115 = vect_array.8[7]; > vect_array.8 ={v} {CLOBBER}; > ivtmp_114 = _79 * 4; > .MASK_LEN_STORE (vectp_a.17_113, 32B, { -1, ... }, _79, 0, vect__8.9_122); > .MASK_LEN_STORE (vectp_b.19_109, 32B, { -1, ... }, _79, 0, vect__8.10_121); > .MASK_LEN_STORE (vectp_c.21_105, 32B, { -1, ... }, _79, 0, vect__8.11_120); > .MASK_LEN_STORE (vectp_d.23_101, 32B, { -1, ... }, _79, 0, vect__8.12_119); > .MASK_LEN_STORE (vectp_e.25_97, 32B, { -1, ... }, _79, 0, vect__8.13_118); > .MASK_LEN_STORE (vectp_f.27_93, 32B, { -1, ... }, _79, 0, vect__8.14_117); > .MASK_LEN_STORE (vectp_g.29_89, 32B, { -1, ... }, _79, 0, vect__8.15_116); > .MASK_LEN_STORE (vectp_h.31_85, 32B, { -1, ... }, _79, 0, vect__8.16_115); > > ASM: > > foo: > lw t4,8(sp) > ld t5,0(sp) > ble t4,zero,.L5 > .L3: > vsetvli t1,t4,e8,mf4,ta,ma > vlseg8e32.v v8,(t5) > slli t3,t1,2 > slli t6,t1,5 > vse32.v v8,0(a0) > vse32.v v9,0(a1) > vse32.v v10,0(a2) > vse32.v v11,0(a3) > vse32.v v12,0(a4) > vse32.v v13,0(a5) > vse32.v v14,0(a6) > vse32.v v15,0(a7) > sub t4,t4,t1 > add t5,t5,t6 > add a0,a0,t3 > add a1,a1,t3 > add a2,a2,t3 > add a3,a3,t3 > add a4,a4,t3 > add a5,a5,t3 > add a6,a6,t3 > add a7,a7,t3 > bne t4,zero,.L3 > .L5: > ret > > The details of the approach: > > Step 1 - Modifiy the LANES LOAD/STORE support function > (vect_load_lanes_supported/vect_store_lanes_supported): > > +/* Return FN if vec_{masked_,mask_len,}load_lanes is available for COUNT > + vectors of type VECTYPE. MASKED_P says whether the masked form is > needed. */ > > -bool > +internal_fn > vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, > bool masked_p) > { > - if (masked_p) > - return vect_lanes_optab_supported_p ("vec_mask_load_lanes", > - vec_mask_load_lanes_optab, > - vectype, count); > + if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes", > + vec_mask_len_load_lanes_optab, > + vectype, count)) > + return IFN_MASK_LEN_LOAD_LANES; > + else if (masked_p) > + { > + if (vect_lanes_optab_supported_p ("vec_mask_load_lanes", > + vec_mask_load_lanes_optab, > + vectype, count)) > + return IFN_MASK_LOAD_LANES; > + } > else > - return vect_lanes_optab_supported_p ("vec_load_lanes", > - vec_load_lanes_optab, > - vectype, count); > + { > + if (vect_lanes_optab_supported_p ("vec_load_lanes", > + vec_load_lanes_optab, > + vectype, count)) > + return IFN_LOAD_LANES; > + } > + return IFN_LAST; > } > > Instead of returning TRUE or FALSE whether target support the LANES > LOAD/STORE. > I change it into return internal_fn of the LANES LOAD/STORE that target > support, > If target didn't support any LANE LOAD/STORE optabs, return IFN_LAST. > > Step 2 - Compute IFN for LANES LOAD/STORE (Only compute once). > > if (!STMT_VINFO_STRIDED_P (first_stmt_info) > && (can_overrun_p || !would_overrun_p) > && compare_step_with_zero (vinfo, stmt_info) > 0) > { > /* First cope with the degenerate case of a single-element > vector. */ > if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) > ; > > else > { > /* Otherwise try using LOAD/STORE_LANES. */ > *lanes_ifn > = vls_type == VLS_LOAD > ? vect_load_lanes_supported (vectype, group_size, masked_p) > : vect_store_lanes_supported (vectype, group_size, > masked_p); > if (*lanes_ifn != IFN_LAST) > { > *memory_access_type = VMAT_LOAD_STORE_LANES; > overrun_p = would_overrun_p; > } > > /* If that fails, try using permuting loads. */ > else if (vls_type == VLS_LOAD > ? vect_grouped_load_supported (vectype, > single_element_p, > group_size) > : vect_grouped_store_supported (vectype, group_size)) > { > *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; > overrun_p = would_overrun_p; > } > } > } > > Step 3 - Build MASK_LEN_{LANES_LOAD,LANES_STORE} Gimple IR: > > + if (lanes_ifn == IFN_MASK_LEN_STORE_LANES) > + { > + if (loop_lens) > + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, > + ncopies, vectype, j, 1); > + else > + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); > + signed char biasval > + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); > + bias = build_int_cst (intQI_type_node, biasval); > + if (!final_mask) > + { > + mask_vectype = truth_type_for (vectype); > + final_mask = build_minus_one_cst (mask_vectype); > + } > + } > + > gcall *call; > - if (final_mask) > + if (final_len && final_mask) > + { > + /* Emit: > + MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, > + LEN, BIAS, VEC_ARRAY). */ > + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > + tree alias_ptr = build_int_cst (ref_type, align); > + call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6, > + dataref_ptr, alias_ptr, > + final_mask, final_len, bias, > + vec_array); > + } > + else if (final_mask) > > The LEN and MASK flow is totally the same as other MASK_LEN_* load/store. OK. Thanks, Richard. > gcc/ChangeLog: > > * internal-fn.cc (internal_load_fn_p): Apply > MASK_LEN_{LOAD_LANES,STORE_LANES} into vectorizer. > (internal_store_fn_p): Ditto. > (internal_fn_len_index): Ditto. > (internal_fn_mask_index): Ditto. > (internal_fn_stored_value_index): Ditto. > * tree-vect-data-refs.cc (vect_store_lanes_supported): Ditto. > (vect_load_lanes_supported): Ditto. > * tree-vect-loop.cc: Ditto. > * tree-vect-slp.cc (vect_slp_prefer_store_lanes_p): Ditto. > * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto. > (get_group_load_store_type): Ditto. > (vectorizable_store): Ditto. > (vectorizable_load): Ditto. > * tree-vectorizer.h (vect_store_lanes_supported): Ditto. > (vect_load_lanes_supported): Ditto. > > --- > gcc/internal-fn.cc | 7 ++ > gcc/tree-vect-data-refs.cc | 61 ++++++++++------ > gcc/tree-vect-loop.cc | 11 +-- > gcc/tree-vect-slp.cc | 2 +- > gcc/tree-vect-stmts.cc | 141 ++++++++++++++++++++++++++++--------- > gcc/tree-vectorizer.h | 4 +- > 6 files changed, 163 insertions(+), 63 deletions(-) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 4f2b20a79e5..cc1ede58799 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4578,6 +4578,7 @@ internal_load_fn_p (internal_fn fn) > case IFN_MASK_LOAD: > case IFN_LOAD_LANES: > case IFN_MASK_LOAD_LANES: > + case IFN_MASK_LEN_LOAD_LANES: > case IFN_GATHER_LOAD: > case IFN_MASK_GATHER_LOAD: > case IFN_MASK_LEN_GATHER_LOAD: > @@ -4600,6 +4601,7 @@ internal_store_fn_p (internal_fn fn) > case IFN_MASK_STORE: > case IFN_STORE_LANES: > case IFN_MASK_STORE_LANES: > + case IFN_MASK_LEN_STORE_LANES: > case IFN_SCATTER_STORE: > case IFN_MASK_SCATTER_STORE: > case IFN_MASK_LEN_SCATTER_STORE: > @@ -4672,6 +4674,8 @@ internal_fn_len_index (internal_fn fn) > case IFN_COND_LEN_NEG: > case IFN_MASK_LEN_LOAD: > case IFN_MASK_LEN_STORE: > + case IFN_MASK_LEN_LOAD_LANES: > + case IFN_MASK_LEN_STORE_LANES: > return 3; > > default: > @@ -4689,8 +4693,10 @@ internal_fn_mask_index (internal_fn fn) > { > case IFN_MASK_LOAD: > case IFN_MASK_LOAD_LANES: > + case IFN_MASK_LEN_LOAD_LANES: > case IFN_MASK_STORE: > case IFN_MASK_STORE_LANES: > + case IFN_MASK_LEN_STORE_LANES: > case IFN_MASK_LEN_LOAD: > case IFN_MASK_LEN_STORE: > return 2; > @@ -4726,6 +4732,7 @@ internal_fn_stored_value_index (internal_fn fn) > return 4; > > case IFN_MASK_LEN_STORE: > + case IFN_MASK_LEN_STORE_LANES: > return 5; > > default: > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc > index a3570c45b52..3e9a284666c 100644 > --- a/gcc/tree-vect-data-refs.cc > +++ b/gcc/tree-vect-data-refs.cc > @@ -5438,22 +5438,31 @@ vect_grouped_store_supported (tree vectype, unsigned > HOST_WIDE_INT count) > return false; > } > > +/* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT > vectors > + of type VECTYPE. MASKED_P says whether the masked form is needed. */ > > -/* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of > - type VECTYPE. MASKED_P says whether the masked form is needed. */ > - > -bool > +internal_fn > vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, > bool masked_p) > { > - if (masked_p) > - return vect_lanes_optab_supported_p ("vec_mask_store_lanes", > - vec_mask_store_lanes_optab, > - vectype, count); > + if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes", > + vec_mask_len_store_lanes_optab, vectype, > + count)) > + return IFN_MASK_LEN_STORE_LANES; > + else if (masked_p) > + { > + if (vect_lanes_optab_supported_p ("vec_mask_store_lanes", > + vec_mask_store_lanes_optab, vectype, > + count)) > + return IFN_MASK_STORE_LANES; > + } > else > - return vect_lanes_optab_supported_p ("vec_store_lanes", > - vec_store_lanes_optab, > - vectype, count); > + { > + if (vect_lanes_optab_supported_p ("vec_store_lanes", > + vec_store_lanes_optab, vectype, count)) > + return IFN_STORE_LANES; > + } > + return IFN_LAST; > } > > > @@ -6056,21 +6065,31 @@ vect_grouped_load_supported (tree vectype, bool > single_element_p, > return false; > } > > -/* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of > - type VECTYPE. MASKED_P says whether the masked form is needed. */ > +/* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT > vectors > + of type VECTYPE. MASKED_P says whether the masked form is needed. */ > > -bool > +internal_fn > vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, > bool masked_p) > { > - if (masked_p) > - return vect_lanes_optab_supported_p ("vec_mask_load_lanes", > - vec_mask_load_lanes_optab, > - vectype, count); > + if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes", > + vec_mask_len_load_lanes_optab, vectype, > + count)) > + return IFN_MASK_LEN_LOAD_LANES; > + else if (masked_p) > + { > + if (vect_lanes_optab_supported_p ("vec_mask_load_lanes", > + vec_mask_load_lanes_optab, vectype, > + count)) > + return IFN_MASK_LOAD_LANES; > + } > else > - return vect_lanes_optab_supported_p ("vec_load_lanes", > - vec_load_lanes_optab, > - vectype, count); > + { > + if (vect_lanes_optab_supported_p ("vec_load_lanes", > vec_load_lanes_optab, > + vectype, count)) > + return IFN_LOAD_LANES; > + } > + return IFN_LAST; > } > > /* Function vect_permute_load_chain. > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index bc3063c3615..1fcd8d07ea1 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -2839,7 +2839,8 @@ start_over: > instructions record it and move on to the next instance. */ > if (loads_permuted > && SLP_INSTANCE_KIND (instance) == slp_inst_kind_store > - && vect_store_lanes_supported (vectype, group_size, false)) > + && vect_store_lanes_supported (vectype, group_size, false) > + != IFN_LAST) > { > FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node) > { > @@ -2848,9 +2849,9 @@ start_over: > /* Use SLP for strided accesses (or if we can't > load-lanes). */ > if (STMT_VINFO_STRIDED_P (stmt_vinfo) > - || ! vect_load_lanes_supported > + || vect_load_lanes_supported > (STMT_VINFO_VECTYPE (stmt_vinfo), > - DR_GROUP_SIZE (stmt_vinfo), false)) > + DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST) > break; > } > > @@ -3153,7 +3154,7 @@ again: > vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); > unsigned int size = DR_GROUP_SIZE (vinfo); > tree vectype = STMT_VINFO_VECTYPE (vinfo); > - if (! vect_store_lanes_supported (vectype, size, false) > + if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST > && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U) > && ! vect_grouped_store_supported (vectype, size)) > return opt_result::failure_at (vinfo->stmt, > @@ -3165,7 +3166,7 @@ again: > bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo); > size = DR_GROUP_SIZE (vinfo); > vectype = STMT_VINFO_VECTYPE (vinfo); > - if (! vect_load_lanes_supported (vectype, size, false) > + if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST > && ! vect_grouped_load_supported (vectype, single_element_p, > size)) > return opt_result::failure_at (vinfo->stmt, > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index cf91b21cf7d..9ad2634762e 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -3094,7 +3094,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, > stmt_vec_info stmt_info, > if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS > (vectype)) > || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype))) > return false; > - return vect_store_lanes_supported (vectype, group_size, false); > + return vect_store_lanes_supported (vectype, group_size, false) != IFN_LAST; > } > > /* Analyze an SLP instance starting from a group of grouped stores. Call > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 86d033aa60c..cd8e0a76374 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -1610,9 +1610,15 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > bool is_load = (vls_type == VLS_LOAD); > if (memory_access_type == VMAT_LOAD_STORE_LANES) > { > - if (is_load > - ? !vect_load_lanes_supported (vectype, group_size, true) > - : !vect_store_lanes_supported (vectype, group_size, true)) > + internal_fn ifn > + = (is_load ? vect_load_lanes_supported (vectype, group_size, true) > + : vect_store_lanes_supported (vectype, group_size, true)); > + if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES) > + vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > + else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES) > + vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > + scalar_mask); > + else > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -1620,10 +1626,7 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > " the target doesn't have an appropriate" > " load/store-lanes instruction.\n"); > LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; > - return; > } > - vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > - scalar_mask); > return; > } > > @@ -2074,7 +2077,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > poly_int64 *poffset, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + internal_fn *lanes_ifn) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; > @@ -2272,24 +2276,30 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) > ; > > - /* Otherwise try using LOAD/STORE_LANES. */ > - else if (vls_type == VLS_LOAD > - ? vect_load_lanes_supported (vectype, group_size, masked_p) > - : vect_store_lanes_supported (vectype, group_size, > - masked_p)) > + else > { > - *memory_access_type = VMAT_LOAD_STORE_LANES; > - overrun_p = would_overrun_p; > - } > + /* Otherwise try using LOAD/STORE_LANES. */ > + *lanes_ifn > + = vls_type == VLS_LOAD > + ? vect_load_lanes_supported (vectype, group_size, masked_p) > + : vect_store_lanes_supported (vectype, group_size, > + masked_p); > + if (*lanes_ifn != IFN_LAST) > + { > + *memory_access_type = VMAT_LOAD_STORE_LANES; > + overrun_p = would_overrun_p; > + } > > - /* If that fails, try using permuting loads. */ > - else if (vls_type == VLS_LOAD > - ? vect_grouped_load_supported (vectype, single_element_p, > - group_size) > - : vect_grouped_store_supported (vectype, group_size)) > - { > - *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; > - overrun_p = would_overrun_p; > + /* If that fails, try using permuting loads. */ > + else if (vls_type == VLS_LOAD > + ? vect_grouped_load_supported (vectype, > + single_element_p, > + group_size) > + : vect_grouped_store_supported (vectype, group_size)) > + { > + *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; > + overrun_p = would_overrun_p; > + } > } > } > > @@ -2378,7 +2388,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > poly_int64 *poffset, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + internal_fn *lanes_ifn) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); > @@ -2441,7 +2452,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > masked_p, > vls_type, memory_access_type, poffset, > alignment_support_scheme, > - misalignment, gs_info)) > + misalignment, gs_info, lanes_ifn)) > return false; > } > else if (STMT_VINFO_STRIDED_P (stmt_info)) > @@ -3087,11 +3098,8 @@ vect_get_loop_variant_data_ptr_increment ( > loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); > tree step = vect_dr_behavior (vinfo, dr_info)->step; > > - /* TODO: We don't support gather/scatter or load_lanes/store_lanes for > pointer > - IVs are updated by variable amount but we will support them in the > future. > - */ > - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER > - && memory_access_type != VMAT_LOAD_STORE_LANES); > + /* gather/scatter never reach here. */ > + gcc_assert (memory_access_type != VMAT_GATHER_SCATTER); > > /* When we support SELECT_VL pattern, we dynamic adjust > the memory address by .SELECT_VL result. > @@ -8094,9 +8102,11 @@ vectorizable_store (vec_info *vinfo, > enum dr_alignment_support alignment_support_scheme; > int misalignment; > poly_int64 poffset; > + internal_fn lanes_ifn; > if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, > vls_type, > ncopies, &memory_access_type, &poffset, > - &alignment_support_scheme, &misalignment, &gs_info)) > + &alignment_support_scheme, &misalignment, &gs_info, > + &lanes_ifn)) > return false; > > if (mask) > @@ -8885,6 +8895,8 @@ vectorizable_store (vec_info *vinfo, > } > > tree final_mask = NULL; > + tree final_len = NULL; > + tree bias = NULL; > if (loop_masks) > final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, > ncopies, vectype, j); > @@ -8892,8 +8904,37 @@ vectorizable_store (vec_info *vinfo, > final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, > final_mask, vec_mask, gsi); > > + if (lanes_ifn == IFN_MASK_LEN_STORE_LANES) > + { > + if (loop_lens) > + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, > + ncopies, vectype, j, 1); > + else > + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); > + signed char biasval > + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); > + bias = build_int_cst (intQI_type_node, biasval); > + if (!final_mask) > + { > + mask_vectype = truth_type_for (vectype); > + final_mask = build_minus_one_cst (mask_vectype); > + } > + } > + > gcall *call; > - if (final_mask) > + if (final_len && final_mask) > + { > + /* Emit: > + MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, > + LEN, BIAS, VEC_ARRAY). */ > + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > + tree alias_ptr = build_int_cst (ref_type, align); > + call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6, > + dataref_ptr, alias_ptr, > + final_mask, final_len, bias, > + vec_array); > + } > + else if (final_mask) > { > /* Emit: > MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, > @@ -9598,9 +9639,11 @@ vectorizable_load (vec_info *vinfo, > enum dr_alignment_support alignment_support_scheme; > int misalignment; > poly_int64 poffset; > + internal_fn lanes_ifn; > if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, > VLS_LOAD, > ncopies, &memory_access_type, &poffset, > - &alignment_support_scheme, &misalignment, &gs_info)) > + &alignment_support_scheme, &misalignment, &gs_info, > + &lanes_ifn)) > return false; > > if (mask) > @@ -10386,6 +10429,8 @@ vectorizable_load (vec_info *vinfo, > tree vec_array = create_vector_array (vectype, vec_num); > > tree final_mask = NULL_TREE; > + tree final_len = NULL_TREE; > + tree bias = NULL_TREE; > if (loop_masks) > final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, > ncopies, vectype, j); > @@ -10393,8 +10438,36 @@ vectorizable_load (vec_info *vinfo, > final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask, > vec_mask, gsi); > > + if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES) > + { > + if (loop_lens) > + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, > + ncopies, vectype, j, 1); > + else > + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); > + signed char biasval > + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); > + bias = build_int_cst (intQI_type_node, biasval); > + if (!final_mask) > + { > + mask_vectype = truth_type_for (vectype); > + final_mask = build_minus_one_cst (mask_vectype); > + } > + } > + > gcall *call; > - if (final_mask) > + if (final_len && final_mask) > + { > + /* Emit: > + VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, > + VEC_MASK, LEN, BIAS). */ > + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > + tree alias_ptr = build_int_cst (ref_type, align); > + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5, > + dataref_ptr, alias_ptr, > + final_mask, final_len, bias); > + } > + else if (final_mask) > { > /* Emit: > VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 1de144988c8..53a3d78d545 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2297,9 +2297,9 @@ extern tree bump_vector_ptr (vec_info *, tree, gimple > *, gimple_stmt_iterator *, > extern void vect_copy_ref_info (tree, tree); > extern tree vect_create_destination_var (tree, tree); > extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); > -extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); > +extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); > -extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); > +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > extern void vect_permute_store_chain (vec_info *, vec<tree> &, > unsigned int, stmt_vec_info, > gimple_stmt_iterator *, vec<tree> *); > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)