On Wed, 27 Nov 2024, Tamar Christina wrote: > Hi All, > > The testcase > > #include <stdint.h> > #include <string.h> > > #define N 8 > #define L 8 > > void f(const uint8_t * restrict seq1, > const uint8_t *idx, uint8_t *seq_out) { > for (int i = 0; i < L; ++i) { > uint8_t h = idx[i]; > memcpy((void *)&seq_out[i * N], (const void *)&seq1[h * N / 2], N / 2); > } > } > > compiled at -O3 -mcpu=neoverse-n1+sve > > miscompiles to: > > ld1w z31.s, p3/z, [x23, z29.s, sxtw] > ld1w z29.s, p7/z, [x23, z30.s, sxtw] > st1w z29.s, p7, [x24, z12.s, sxtw] > st1w z31.s, p7, [x24, z12.s, sxtw] > > rather than > > ld1w z31.s, p3/z, [x23, z29.s, sxtw] > ld1w z29.s, p7/z, [x23, z30.s, sxtw] > st1w z29.s, p7, [x24, z12.s, sxtw] > addvl x3, x24, #2 > st1w z31.s, p3, [x3, z12.s, sxtw] > > Where two things go wrong, the wrong mask is used and the address pointers to > the stores are wrong. > > This issue is happening because the codegen loop in vectorizable_store is a > nested loop where in the outer loop we iterate over ncopies and in the inner > loop we loop over vec_num. > > For SLP ncopies == 1 and vec_num == SLP_NUM_STMS, but the loop mask is > determined by only the outerloop index and the pointer address is only updated > in the outer loop. > > As such for SLP we always use the same predicate and the same memory location. > This patch flattens the two loops and instead iterates over ncopies * vec_num > and simplified the indexing. > > This does not fully fix the gcc_r miscompile error in SPECCPU 2017 as the > error > moves somewhere else. I will look at that next but fixes some other libraries > that also started failing. > > Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, > x86_64-pc-linux-gnu -m32, -m64 and no issues > > Ok for master?
OK. Thanks, Richard. > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/117557 > * tree-vect-stmts.cc (vectorizable_store): Flatten the ncopies and > vec_num loops. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/117557 > * gcc.target/aarch64/pr117557.c: New test. > > --- > diff --git a/gcc/testsuite/gcc.target/aarch64/pr117557.c > b/gcc/testsuite/gcc.target/aarch64/pr117557.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..80b3fde41109988db70eafd715224df0b0029cd1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/pr117557.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mcpu=neoverse-n1+sve -fdump-tree-vect" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <stdint.h> > +#include <string.h> > + > +#define N 8 > +#define L 8 > + > +/* > +**f: > +** ... > +** ld1w z[0-9]+.s, p([0-9]+)/z, \[x[0-9]+, z[0-9]+.s, sxtw\] > +** ld1w z[0-9]+.s, p([0-9]+)/z, \[x[0-9]+, z[0-9]+.s, sxtw\] > +** st1w z[0-9]+.s, p\1, \[x[0-9]+, z[0-9]+.s, sxtw\] > +** incb x([0-9]+), all, mul #2 > +** st1w z[0-9]+.s, p\2, \[x\3, z[0-9]+.s, sxtw\] > +** ret > +** ... > +*/ > +void f(const uint8_t * restrict seq1, > + const uint8_t *idx, uint8_t *seq_out) { > + for (int i = 0; i < L; ++i) { > + uint8_t h = idx[i]; > + memcpy((void *)&seq_out[i * N], (const void *)&seq1[h * N / 2], N / 2); > + } > +} > + > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index > c2d5818b2786123fac7afe290d85c7dd2bda4308..4759c274f3ccbb111a907576539b2a8efb7726a3 > 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -9228,7 +9228,8 @@ vectorizable_store (vec_info *vinfo, > gcc_assert (!grouped_store); > auto_vec<tree> vec_offsets; > unsigned int inside_cost = 0, prologue_cost = 0; > - for (j = 0; j < ncopies; j++) > + int num_stmts = ncopies * vec_num; > + for (j = 0; j < num_stmts; j++) > { > gimple *new_stmt; > if (j == 0) > @@ -9246,14 +9247,14 @@ vectorizable_store (vec_info *vinfo, > vect_get_slp_defs (op_node, gvec_oprnds[0]); > else > vect_get_vec_defs_for_operand (vinfo, first_stmt_info, > - ncopies, op, gvec_oprnds[0]); > + num_stmts, op, > gvec_oprnds[0]); > if (mask) > { > if (slp_node) > vect_get_slp_defs (mask_node, &vec_masks); > else > vect_get_vec_defs_for_operand (vinfo, stmt_info, > - ncopies, > + num_stmts, > mask, &vec_masks, > mask_vectype); > } > @@ -9279,281 +9280,280 @@ vectorizable_store (vec_info *vinfo, > } > > new_stmt = NULL; > - for (i = 0; i < vec_num; ++i) > + if (!costing_p) > { > - if (!costing_p) > - { > - vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i]; > - if (mask) > - vec_mask = vec_masks[vec_num * j + i]; > - /* We should have catched mismatched types earlier. */ > - gcc_assert (useless_type_conversion_p (vectype, > - TREE_TYPE > (vec_oprnd))); > - } > - unsigned HOST_WIDE_INT align; > - tree final_mask = NULL_TREE; > - tree final_len = NULL_TREE; > - tree bias = NULL_TREE; > - if (!costing_p) > + vec_oprnd = (*gvec_oprnds[0])[j]; > + if (mask) > + vec_mask = vec_masks[j]; > + /* We should have catched mismatched types earlier. */ > + gcc_assert (useless_type_conversion_p (vectype, > + TREE_TYPE (vec_oprnd))); > + } > + unsigned HOST_WIDE_INT align; > + tree final_mask = NULL_TREE; > + tree final_len = NULL_TREE; > + tree bias = NULL_TREE; > + if (!costing_p) > + { > + if (loop_masks) > + final_mask = vect_get_loop_mask (loop_vinfo, gsi, > + loop_masks, num_stmts, > + vectype, j); > + if (vec_mask) > + final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, > + final_mask, vec_mask, gsi); > + } > + > + if (gs_info.ifn != IFN_LAST) > + { > + if (costing_p) > { > - if (loop_masks) > - final_mask = vect_get_loop_mask (loop_vinfo, gsi, > - loop_masks, > - ncopies * vec_num, > - vectype, j); > - if (vec_mask) > - final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, > - final_mask, vec_mask, gsi); > + unsigned int cnunits = vect_nunits_for_cost (vectype); > + inside_cost > + += record_stmt_cost (cost_vec, cnunits, scalar_store, > + stmt_info, slp_node, 0, > + vect_body); > + continue; > } > > - if (gs_info.ifn != IFN_LAST) > - { > - if (costing_p) > - { > - unsigned int cnunits = vect_nunits_for_cost (vectype); > - inside_cost > - += record_stmt_cost (cost_vec, cnunits, scalar_store, > - stmt_info, slp_node, 0, > - vect_body); > - continue; > - } > + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > + vec_offset = vec_offsets[j]; > > - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > - vec_offset = vec_offsets[vec_num * j + i]; > - tree scale = size_int (gs_info.scale); > + tree scale = size_int (gs_info.scale); > > - if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE) > - { > - if (loop_lens) > - final_len = vect_get_loop_len (loop_vinfo, gsi, > - loop_lens, > - ncopies * vec_num, > - vectype, j, 1); > - else > - final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); > - signed char biasval > - = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); > - bias = build_int_cst (intQI_type_node, biasval); > - if (!final_mask) > - { > - mask_vectype = truth_type_for (vectype); > - final_mask = build_minus_one_cst (mask_vectype); > - } > - } > + if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE) > + { > + if (loop_lens) > + final_len = vect_get_loop_len (loop_vinfo, gsi, > + loop_lens, num_stmts, > + vectype, j, 1); > + else > + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); > > - gcall *call; > - if (final_len && final_mask) > + signed char biasval > + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); > + bias = build_int_cst (intQI_type_node, biasval); > + if (!final_mask) > { > - if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) > - call = gimple_build_call_internal ( > - IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, > - vec_offset, scale, vec_oprnd, final_mask, final_len, > - bias); > - else > - /* Non-vector offset indicates that prefer to take > - MASK_LEN_STRIDED_STORE instead of the > - IFN_MASK_SCATTER_STORE with direct stride arg. */ > - call = gimple_build_call_internal ( > - IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr, > - vec_offset, vec_oprnd, final_mask, final_len, bias); > + mask_vectype = truth_type_for (vectype); > + final_mask = build_minus_one_cst (mask_vectype); > } > - else if (final_mask) > - call = gimple_build_call_internal > - (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, > - vec_offset, scale, vec_oprnd, final_mask); > + } > + > + gcall *call; > + if (final_len && final_mask) > + { > + if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) > + call = gimple_build_call_internal ( > + IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, > + vec_offset, scale, vec_oprnd, final_mask, final_len, > + bias); > else > - call = gimple_build_call_internal (IFN_SCATTER_STORE, 4, > - dataref_ptr, vec_offset, > - scale, vec_oprnd); > - gimple_call_set_nothrow (call, true); > - vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); > - new_stmt = call; > + /* Non-vector offset indicates that prefer to take > + MASK_LEN_STRIDED_STORE instead of the > + IFN_MASK_SCATTER_STORE with direct stride arg. */ > + call = gimple_build_call_internal ( > + IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr, > + vec_offset, vec_oprnd, final_mask, final_len, bias); > } > - else if (gs_info.decl) > + else if (final_mask) > + call = gimple_build_call_internal > + (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, > + vec_offset, scale, vec_oprnd, final_mask); > + else > + call = gimple_build_call_internal (IFN_SCATTER_STORE, 4, > + dataref_ptr, vec_offset, > + scale, vec_oprnd); > + gimple_call_set_nothrow (call, true); > + vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); > + new_stmt = call; > + } > + else if (gs_info.decl) > + { > + /* The builtin decls path for scatter is legacy, x86 only. */ > + gcc_assert (nunits.is_constant () > + && (!final_mask > + || SCALAR_INT_MODE_P > + (TYPE_MODE (TREE_TYPE (final_mask))))); > + if (costing_p) > { > - /* The builtin decls path for scatter is legacy, x86 only. */ > - gcc_assert (nunits.is_constant () > - && (!final_mask > - || SCALAR_INT_MODE_P > - (TYPE_MODE (TREE_TYPE (final_mask))))); > - if (costing_p) > - { > - unsigned int cnunits = vect_nunits_for_cost (vectype); > - inside_cost > - += record_stmt_cost (cost_vec, cnunits, scalar_store, > - stmt_info, slp_node, 0, vect_body); > - continue; > - } > - poly_uint64 offset_nunits > - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); > - if (known_eq (nunits, offset_nunits)) > - { > - new_stmt = vect_build_one_scatter_store_call > + unsigned int cnunits = vect_nunits_for_cost (vectype); > + inside_cost > + += record_stmt_cost (cost_vec, cnunits, scalar_store, > + stmt_info, slp_node, 0, vect_body); > + continue; > + } > + > + poly_uint64 offset_nunits > + = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); > + if (known_eq (nunits, offset_nunits)) > + { > + new_stmt = vect_build_one_scatter_store_call > (vinfo, stmt_info, gsi, &gs_info, > - dataref_ptr, vec_offsets[vec_num * j + i], > + dataref_ptr, vec_offsets[j], > vec_oprnd, final_mask); > - vect_finish_stmt_generation (vinfo, stmt_info, > - new_stmt, gsi); > - } > - else if (known_eq (nunits, offset_nunits * 2)) > - { > - /* We have a offset vector with half the number of > - lanes but the builtins will store full vectype > - data from the lower lanes. */ > - new_stmt = vect_build_one_scatter_store_call > + vect_finish_stmt_generation (vinfo, stmt_info, > + new_stmt, gsi); > + } > + else if (known_eq (nunits, offset_nunits * 2)) > + { > + /* We have a offset vector with half the number of > + lanes but the builtins will store full vectype > + data from the lower lanes. */ > + new_stmt = vect_build_one_scatter_store_call > (vinfo, stmt_info, gsi, &gs_info, > - dataref_ptr, > - vec_offsets[2 * vec_num * j + 2 * i], > + dataref_ptr, vec_offsets[2 * j], > vec_oprnd, final_mask); > - vect_finish_stmt_generation (vinfo, stmt_info, > + vect_finish_stmt_generation (vinfo, stmt_info, > new_stmt, gsi); > - int count = nunits.to_constant (); > - vec_perm_builder sel (count, count, 1); > - sel.quick_grow (count); > - for (int i = 0; i < count; ++i) > - sel[i] = i | (count / 2); > - vec_perm_indices indices (sel, 2, count); > - tree perm_mask > - = vect_gen_perm_mask_checked (vectype, indices); > - new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR, > - vec_oprnd, vec_oprnd, > - perm_mask); > - vec_oprnd = make_ssa_name (vectype); > - gimple_set_lhs (new_stmt, vec_oprnd); > - vect_finish_stmt_generation (vinfo, stmt_info, > - new_stmt, gsi); > - if (final_mask) > - { > - new_stmt = gimple_build_assign (NULL_TREE, > - VEC_UNPACK_HI_EXPR, > - final_mask); > - final_mask = make_ssa_name > + int count = nunits.to_constant (); > + vec_perm_builder sel (count, count, 1); > + sel.quick_grow (count); > + for (int i = 0; i < count; ++i) > + sel[i] = i | (count / 2); > + vec_perm_indices indices (sel, 2, count); > + tree perm_mask > + = vect_gen_perm_mask_checked (vectype, indices); > + new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR, > + vec_oprnd, vec_oprnd, > + perm_mask); > + vec_oprnd = make_ssa_name (vectype); > + gimple_set_lhs (new_stmt, vec_oprnd); > + vect_finish_stmt_generation (vinfo, stmt_info, > + new_stmt, gsi); > + if (final_mask) > + { > + new_stmt = gimple_build_assign (NULL_TREE, > + VEC_UNPACK_HI_EXPR, > + final_mask); > + final_mask = make_ssa_name > (truth_type_for (gs_info.offset_vectype)); > - gimple_set_lhs (new_stmt, final_mask); > - vect_finish_stmt_generation (vinfo, stmt_info, > - new_stmt, gsi); > + gimple_set_lhs (new_stmt, final_mask); > + vect_finish_stmt_generation (vinfo, stmt_info, > + new_stmt, gsi); > } > - new_stmt = vect_build_one_scatter_store_call > - (vinfo, stmt_info, gsi, &gs_info, > - dataref_ptr, > - vec_offsets[2 * vec_num * j + 2 * i + 1], > - vec_oprnd, final_mask); > - vect_finish_stmt_generation (vinfo, stmt_info, > - new_stmt, gsi); > - } > - else if (known_eq (nunits * 2, offset_nunits)) > - { > - /* We have a offset vector with double the number of > - lanes. Select the low/high part accordingly. */ > - vec_offset = vec_offsets[(vec_num * j + i) / 2]; > - if ((vec_num * j + i) & 1) > - { > - int count = offset_nunits.to_constant (); > - vec_perm_builder sel (count, count, 1); > - sel.quick_grow (count); > - for (int i = 0; i < count; ++i) > - sel[i] = i | (count / 2); > - vec_perm_indices indices (sel, 2, count); > - tree perm_mask = vect_gen_perm_mask_checked > - (TREE_TYPE (vec_offset), indices); > - new_stmt = gimple_build_assign (NULL_TREE, > - VEC_PERM_EXPR, > - vec_offset, > - vec_offset, > - perm_mask); > - vec_offset = make_ssa_name (TREE_TYPE (vec_offset)); > - gimple_set_lhs (new_stmt, vec_offset); > - vect_finish_stmt_generation (vinfo, stmt_info, > - new_stmt, gsi); > - } > - new_stmt = vect_build_one_scatter_store_call > + > + new_stmt = vect_build_one_scatter_store_call > + (vinfo, stmt_info, gsi, &gs_info, > + dataref_ptr, vec_offsets[2 * j + 1], > + vec_oprnd, final_mask); > + vect_finish_stmt_generation (vinfo, stmt_info, > + new_stmt, gsi); > + } > + else if (known_eq (nunits * 2, offset_nunits)) > + { > + /* We have a offset vector with double the number of > + lanes. Select the low/high part accordingly. */ > + vec_offset = vec_offsets[j / 2]; > + if (j & 1) > + { > + int count = offset_nunits.to_constant (); > + vec_perm_builder sel (count, count, 1); > + sel.quick_grow (count); > + for (int i = 0; i < count; ++i) > + sel[i] = i | (count / 2); > + vec_perm_indices indices (sel, 2, count); > + tree perm_mask = vect_gen_perm_mask_checked > + (TREE_TYPE (vec_offset), indices); > + new_stmt = gimple_build_assign (NULL_TREE, > + VEC_PERM_EXPR, > + vec_offset, > + vec_offset, > + perm_mask); > + vec_offset = make_ssa_name (TREE_TYPE (vec_offset)); > + gimple_set_lhs (new_stmt, vec_offset); > + vect_finish_stmt_generation (vinfo, stmt_info, > + new_stmt, gsi); > + } > + > + new_stmt = vect_build_one_scatter_store_call > (vinfo, stmt_info, gsi, &gs_info, > dataref_ptr, vec_offset, > vec_oprnd, final_mask); > - vect_finish_stmt_generation (vinfo, stmt_info, > - new_stmt, gsi); > - } > - else > - gcc_unreachable (); > - } > - else > + vect_finish_stmt_generation (vinfo, stmt_info, > + new_stmt, gsi); > + } > + else > + gcc_unreachable (); > + } > + else > + { > + /* Emulated scatter. */ > + gcc_assert (!final_mask); > + if (costing_p) > { > - /* Emulated scatter. */ > - gcc_assert (!final_mask); > - if (costing_p) > - { > - unsigned int cnunits = vect_nunits_for_cost (vectype); > - /* For emulated scatter N offset vector element extracts > - (we assume the scalar scaling and ptr + offset add is > - consumed by the load). */ > - inside_cost > - += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, > - stmt_info, slp_node, 0, vect_body); > - /* N scalar stores plus extracting the elements. */ > - inside_cost > - += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, > - stmt_info, slp_node, 0, vect_body); > - inside_cost > - += record_stmt_cost (cost_vec, cnunits, scalar_store, > - stmt_info, slp_node, 0, vect_body); > - continue; > - } > + unsigned int cnunits = vect_nunits_for_cost (vectype); > + /* For emulated scatter N offset vector element extracts > + (we assume the scalar scaling and ptr + offset add is > + consumed by the load). */ > + inside_cost > + += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, > + stmt_info, slp_node, 0, vect_body); > + /* N scalar stores plus extracting the elements. */ > + inside_cost > + += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, > + stmt_info, slp_node, 0, vect_body); > + inside_cost > + += record_stmt_cost (cost_vec, cnunits, scalar_store, > + stmt_info, slp_node, 0, vect_body); > + continue; > + } > > - unsigned HOST_WIDE_INT const_nunits = nunits.to_constant (); > - unsigned HOST_WIDE_INT const_offset_nunits > - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant > (); > - vec<constructor_elt, va_gc> *ctor_elts; > - vec_alloc (ctor_elts, const_nunits); > - gimple_seq stmts = NULL; > - tree elt_type = TREE_TYPE (vectype); > - unsigned HOST_WIDE_INT elt_size > - = tree_to_uhwi (TYPE_SIZE (elt_type)); > - /* We support offset vectors with more elements > - than the data vector for now. */ > - unsigned HOST_WIDE_INT factor > - = const_offset_nunits / const_nunits; > - vec_offset = vec_offsets[(vec_num * j + i) / factor]; > - unsigned elt_offset > - = ((vec_num * j + i) % factor) * const_nunits; > - tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); > - tree scale = size_int (gs_info.scale); > - align = get_object_alignment (DR_REF (first_dr_info->dr)); > - tree ltype = build_aligned_type (TREE_TYPE (vectype), align); > - for (unsigned k = 0; k < const_nunits; ++k) > - { > - /* Compute the offsetted pointer. */ > - tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type), > - bitsize_int (k + elt_offset)); > - tree idx > - = gimple_build (&stmts, BIT_FIELD_REF, idx_type, > - vec_offset, TYPE_SIZE (idx_type), boff); > - idx = gimple_convert (&stmts, sizetype, idx); > - idx = gimple_build (&stmts, MULT_EXPR, sizetype, > - idx, scale); > - tree ptr > - = gimple_build (&stmts, PLUS_EXPR, > - TREE_TYPE (dataref_ptr), > - dataref_ptr, idx); > - ptr = gimple_convert (&stmts, ptr_type_node, ptr); > - /* Extract the element to be stored. */ > - tree elt > - = gimple_build (&stmts, BIT_FIELD_REF, > - TREE_TYPE (vectype), > - vec_oprnd, TYPE_SIZE (elt_type), > - bitsize_int (k * elt_size)); > - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > - stmts = NULL; > - tree ref > - = build2 (MEM_REF, ltype, ptr, > - build_int_cst (ref_type, 0)); > - new_stmt = gimple_build_assign (ref, elt); > - vect_finish_stmt_generation (vinfo, stmt_info, > - new_stmt, gsi); > - } > - if (slp) > - slp_node->push_vec_def (new_stmt); > + unsigned HOST_WIDE_INT const_nunits = nunits.to_constant (); > + unsigned HOST_WIDE_INT const_offset_nunits > + = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant (); > + vec<constructor_elt, va_gc> *ctor_elts; > + vec_alloc (ctor_elts, const_nunits); > + gimple_seq stmts = NULL; > + tree elt_type = TREE_TYPE (vectype); > + unsigned HOST_WIDE_INT elt_size > + = tree_to_uhwi (TYPE_SIZE (elt_type)); > + /* We support offset vectors with more elements > + than the data vector for now. */ > + unsigned HOST_WIDE_INT factor > + = const_offset_nunits / const_nunits; > + vec_offset = vec_offsets[j / factor]; > + unsigned elt_offset > + = (j % factor) * const_nunits; > + tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); > + tree scale = size_int (gs_info.scale); > + align = get_object_alignment (DR_REF (first_dr_info->dr)); > + tree ltype = build_aligned_type (TREE_TYPE (vectype), align); > + for (unsigned k = 0; k < const_nunits; ++k) > + { > + /* Compute the offsetted pointer. */ > + tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type), > + bitsize_int (k + elt_offset)); > + tree idx > + = gimple_build (&stmts, BIT_FIELD_REF, idx_type, > + vec_offset, TYPE_SIZE (idx_type), boff); > + idx = gimple_convert (&stmts, sizetype, idx); > + idx = gimple_build (&stmts, MULT_EXPR, sizetype, > + idx, scale); > + tree ptr > + = gimple_build (&stmts, PLUS_EXPR, > + TREE_TYPE (dataref_ptr), > + dataref_ptr, idx); > + ptr = gimple_convert (&stmts, ptr_type_node, ptr); > + /* Extract the element to be stored. */ > + tree elt > + = gimple_build (&stmts, BIT_FIELD_REF, > + TREE_TYPE (vectype), > + vec_oprnd, TYPE_SIZE (elt_type), > + bitsize_int (k * elt_size)); > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + stmts = NULL; > + tree ref > + = build2 (MEM_REF, ltype, ptr, > + build_int_cst (ref_type, 0)); > + new_stmt = gimple_build_assign (ref, elt); > + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > } > + > + if (slp) > + slp_node->push_vec_def (new_stmt); > } > + > if (!slp && !costing_p) > STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); > } > > > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)