I was too eager about removing ncopies calculations in g:10833849b55. When emulating gather/scatter, the offset ncopies can be different from the data ncopies. This patch restores the original calculation.
Tested on aarch64-linux-gnu and x86_64-linux-gnu. Pushed as obvious, since it's essentially reverting part of my earlier patch (except for obvious adjustments to keep slp_node). Richard gcc/ PR tree-optimization/103494 * tree-vect-stmts.c (vect_get_gather_scatter_ops): Remove ncopies argument and calculate ncopies from gs_info->offset_vectype where necessary. (vectorizable_store, vectorizable_load): Update accordingly. gcc/testsuite/ PR tree-optimization/103494 * gcc.dg/vect/pr103494.c: New test. * g++.dg/vect/pr103494.cc: Likewise. --- gcc/testsuite/g++.dg/vect/pr103494.cc | 26 ++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr103494.c | 14 ++++++++++++++ gcc/tree-vect-stmts.c | 21 ++++++++++++--------- 3 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/g++.dg/vect/pr103494.cc create mode 100644 gcc/testsuite/gcc.dg/vect/pr103494.c diff --git a/gcc/testsuite/g++.dg/vect/pr103494.cc b/gcc/testsuite/g++.dg/vect/pr103494.cc new file mode 100644 index 00000000000..c0b078105c2 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr103494.cc @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ + +void glFinish(); +struct _Vector_base { + struct { + unsigned _M_start; + } _M_impl; +}; +class vector : _Vector_base { +public: + vector(long) {} + unsigned *data() { return &_M_impl._M_start; } +}; +void *PutBitsIndexedImpl_color_table; +int PutBitsIndexedImpl_dstRectHeight; +char *PutBitsIndexedImpl_src_ptr; +void PutBitsIndexedImpl() { + vector unpacked_buf(PutBitsIndexedImpl_dstRectHeight); + unsigned *dst_ptr = unpacked_buf.data(); + for (int x; x; x++) { + char i = *PutBitsIndexedImpl_src_ptr++; + dst_ptr[x] = static_cast<int *>(PutBitsIndexedImpl_color_table)[i]; + } + glFinish(); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr103494.c b/gcc/testsuite/gcc.dg/vect/pr103494.c new file mode 100644 index 00000000000..b544bf2379c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr103494.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ + +typedef int T1; +typedef signed char T2; + +T1 +f (T1 *d, T2 *x, int n) +{ + unsigned char res = 0; + for (int i = 0; i < n; ++i) + res += d[x[i]]; + return res; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 8642acbc0b4..9726450ab2d 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2962,8 +2962,7 @@ vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info, static void vect_get_gather_scatter_ops (loop_vec_info loop_vinfo, class loop *loop, stmt_vec_info stmt_info, - slp_tree slp_node, unsigned int ncopies, - gather_scatter_info *gs_info, + slp_tree slp_node, gather_scatter_info *gs_info, tree *dataref_ptr, vec<tree> *vec_offset) { gimple_seq stmts = NULL; @@ -2978,9 +2977,13 @@ vect_get_gather_scatter_ops (loop_vec_info loop_vinfo, if (slp_node) vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset); else - vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies, - gs_info->offset, vec_offset, - gs_info->offset_vectype); + { + unsigned ncopies + = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype); + vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies, + gs_info->offset, vec_offset, + gs_info->offset_vectype); + } } /* Prepare to implement a grouped or strided load or store using @@ -8149,8 +8152,8 @@ vectorizable_store (vec_info *vinfo, else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info, - slp_node, ncopies, &gs_info, - &dataref_ptr, &vec_offsets); + slp_node, &gs_info, &dataref_ptr, + &vec_offsets); vec_offset = vec_offsets[0]; } else @@ -9454,8 +9457,8 @@ vectorizable_load (vec_info *vinfo, else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info, - slp_node, ncopies, &gs_info, - &dataref_ptr, &vec_offsets); + slp_node, &gs_info, &dataref_ptr, + &vec_offsets); } else dataref_ptr -- 2.25.1