Hi, Gentle ping again. Thanks!
Kewen on 2019/5/21 上午10:02, Kewen.Lin wrote: > Hi, > > Gentle ping again. Thanks! > > > Kewen > > on 2019/5/5 下午2:15, Kewen.Lin wrote: >> Hi, >> >> I'd like to gentle ping for this patch: >> https://gcc.gnu.org/ml/gcc-patches/2019-03/msg00966.html >> >> OK for trunk now? >> >> Thanks! >> >> on 2019/3/20 上午11:14, Kewen.Lin wrote: >>> Hi, >>> >>> Please refer to below link for previous threads. >>> https://gcc.gnu.org/ml/gcc-patches/2019-03/msg00348.html >>> >>> Comparing to patch v2, I've moved up the vector operation target >>> check upward together with vector type target check. Besides, I >>> ran bootstrap and regtest on powerpc64-linux-gnu (BE), updated >>> testcases' requirements and options for robustness. >>> >>> Is it OK for GCC10? >>> >>> >>> gcc/ChangeLog >>> >>> 2019-03-20 Kewen Lin <li...@gcc.gnu.org> >>> >>> PR target/88497 >>> * tree-ssa-reassoc.c (reassociate_bb): Swap the positions of >>> GIMPLE_BINARY_RHS check and gimple_visited_p check, call new >>> function undistribute_bitref_for_vector. >>> (undistribute_bitref_for_vector): New function. >>> (cleanup_vinfo_map): Likewise. >>> (unsigned_cmp): Likewise. >>> >>> gcc/testsuite/ChangeLog >>> >>> 2019-03-20 Kewen Lin <li...@gcc.gnu.org> >>> >>> * gcc.dg/tree-ssa/pr88497-1.c: New test. >>> * gcc.dg/tree-ssa/pr88497-2.c: Likewise. >>> * gcc.dg/tree-ssa/pr88497-3.c: Likewise. >>> * gcc.dg/tree-ssa/pr88497-4.c: Likewise. >>> * gcc.dg/tree-ssa/pr88497-5.c: Likewise. >>> >>> --- >>> gcc/testsuite/gcc.dg/tree-ssa/pr88497-1.c | 44 +++++ >>> gcc/testsuite/gcc.dg/tree-ssa/pr88497-2.c | 33 ++++ >>> gcc/testsuite/gcc.dg/tree-ssa/pr88497-3.c | 33 ++++ >>> gcc/testsuite/gcc.dg/tree-ssa/pr88497-4.c | 33 ++++ >>> gcc/testsuite/gcc.dg/tree-ssa/pr88497-5.c | 33 ++++ >>> gcc/tree-ssa-reassoc.c | 306 >>> +++++++++++++++++++++++++++++- >>> 6 files changed, 477 insertions(+), 5 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr88497-1.c >>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr88497-2.c >>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr88497-3.c >>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr88497-4.c >>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr88497-5.c >>> >>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-1.c >>> b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-1.c >>> new file mode 100644 >>> index 0000000..99c9af8 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-1.c >>> @@ -0,0 +1,44 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target vect_double } */ >>> +/* { dg-require-effective-target powerpc_vsx_ok { target { powerpc*-*-* } >>> } } */ >>> +/* { dg-options "-O2 -ffast-math" } */ >>> +/* { dg-options "-O2 -ffast-math -mvsx -fdump-tree-reassoc1" { target { >>> powerpc*-*-* } } } */ >>> + >>> +/* To test reassoc can undistribute vector bit_field_ref summation. >>> + >>> + arg1 and arg2 are two arrays whose elements of type vector double. >>> + Assuming: >>> + A0 = arg1[0], A1 = arg1[1], A2 = arg1[2], A3 = arg1[3], >>> + B0 = arg2[0], B1 = arg2[1], B2 = arg2[2], B3 = arg2[3], >>> + >>> + Then: >>> + V0 = A0 * B0, V1 = A1 * B1, V2 = A2 * B2, V3 = A3 * B3, >>> + >>> + reassoc transforms >>> + >>> + accumulator += V0[0] + V0[1] + V1[0] + V1[1] + V2[0] + V2[1] >>> + + V3[0] + V3[1]; >>> + >>> + into: >>> + >>> + T = V0 + V1 + V2 + V3 >>> + accumulator += T[0] + T[1]; >>> + >>> + Fewer bit_field_refs, only two for 128 or more bits vector. */ >>> + >>> +typedef double v2df __attribute__ ((vector_size (16))); >>> +double >>> +test (double accumulator, v2df arg1[], v2df arg2[]) >>> +{ >>> + v2df temp; >>> + temp = arg1[0] * arg2[0]; >>> + accumulator += temp[0] + temp[1]; >>> + temp = arg1[1] * arg2[1]; >>> + accumulator += temp[0] + temp[1]; >>> + temp = arg1[2] * arg2[2]; >>> + accumulator += temp[0] + temp[1]; >>> + temp = arg1[3] * arg2[3]; >>> + accumulator += temp[0] + temp[1]; >>> + return accumulator; >>> +} >>> +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 2 "reassoc1" { target >>> { powerpc*-*-* } } } } */ >>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-2.c >>> b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-2.c >>> new file mode 100644 >>> index 0000000..61ed0bf5 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-2.c >>> @@ -0,0 +1,33 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target vect_float } */ >>> +/* { dg-require-effective-target powerpc_altivec_ok { target { >>> powerpc*-*-* } } } */ >>> +/* { dg-options "-O2 -ffast-math" } */ >>> +/* { dg-options "-O2 -ffast-math -maltivec -fdump-tree-reassoc1" { target >>> { powerpc*-*-* } } } */ >>> + >>> +/* To test reassoc can undistribute vector bit_field_ref on multiplication. >>> + >>> + v1, v2, v3, v4 of type vector float. >>> + >>> + reassoc transforms >>> + >>> + accumulator *= v1[0] * v1[1] * v1[2] * v1[3] * >>> + v2[0] * v2[1] * v2[2] * v2[3] * >>> + v3[0] * v3[1] * v3[2] * v3[3] * >>> + v4[0] * v4[1] * v4[2] * v4[3] ; >>> + >>> + into: >>> + >>> + T = v1 * v2 * v3 * v4; >>> + accumulator *= T[0] * T[1] * T[2] * T[3]; >>> + >>> + Fewer bit_field_refs, only four for 128 or more bits vector. */ >>> + >>> +typedef float v4si __attribute__((vector_size(16))); >>> +float test(float accumulator, v4si v1, v4si v2, v4si v3, v4si v4) { >>> + accumulator *= v1[0] * v1[1] * v1[2] * v1[3]; >>> + accumulator *= v2[0] * v2[1] * v2[2] * v2[3]; >>> + accumulator *= v3[0] * v3[1] * v3[2] * v3[3]; >>> + accumulator *= v4[0] * v4[1] * v4[2] * v4[3]; >>> + return accumulator; >>> +} >>> +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target >>> { powerpc*-*-* } } } } */ >>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-3.c >>> b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-3.c >>> new file mode 100644 >>> index 0000000..3790afc >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-3.c >>> @@ -0,0 +1,33 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target vect_int } */ >>> +/* { dg-require-effective-target powerpc_altivec_ok { target { >>> powerpc*-*-* } } } */ >>> +/* { dg-options "-O2 -ffast-math" } */ >>> +/* { dg-options "-O2 -ffast-math -maltivec -fdump-tree-reassoc1" { target >>> { powerpc*-*-* } } } */ >>> + >>> +/* To test reassoc can undistribute vector bit_field_ref on bitwise AND. >>> + >>> + v1, v2, v3, v4 of type vector int. >>> + >>> + reassoc transforms >>> + >>> + accumulator &= v1[0] & v1[1] & v1[2] & v1[3] & >>> + v2[0] & v2[1] & v2[2] & v2[3] & >>> + v3[0] & v3[1] & v3[2] & v3[3] & >>> + v4[0] & v4[1] & v4[2] & v4[3] ; >>> + >>> + into: >>> + >>> + T = v1 & v2 & v3 & v4; >>> + accumulator &= T[0] & T[1] & T[2] & T[3]; >>> + >>> + Fewer bit_field_refs, only four for 128 or more bits vector. */ >>> + >>> +typedef int v4si __attribute__((vector_size(16))); >>> +int test(int accumulator, v4si v1, v4si v2, v4si v3, v4si v4) { >>> + accumulator &= v1[0] & v1[1] & v1[2] & v1[3]; >>> + accumulator &= v2[0] & v2[1] & v2[2] & v2[3]; >>> + accumulator &= v3[0] & v3[1] & v3[2] & v3[3]; >>> + accumulator &= v4[0] & v4[1] & v4[2] & v4[3]; >>> + return accumulator; >>> +} >>> +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target >>> { powerpc*-*-* } } } } */ >>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-4.c >>> b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-4.c >>> new file mode 100644 >>> index 0000000..1864aad >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-4.c >>> @@ -0,0 +1,33 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target vect_int } */ >>> +/* { dg-require-effective-target powerpc_altivec_ok { target { >>> powerpc*-*-* } } } */ >>> +/* { dg-options "-O2 -ffast-math" } */ >>> +/* { dg-options "-O2 -ffast-math -maltivec -fdump-tree-reassoc1" { target >>> { powerpc*-*-* } } } */ >>> + >>> +/* To test reassoc can undistribute vector bit_field_ref on bitwise IOR. >>> + >>> + v1, v2, v3, v4 of type vector int. >>> + >>> + reassoc transforms >>> + >>> + accumulator |= v1[0] | v1[1] | v1[2] | v1[3] | >>> + v2[0] | v2[1] | v2[2] | v2[3] | >>> + v3[0] | v3[1] | v3[2] | v3[3] | >>> + v4[0] | v4[1] | v4[2] | v4[3] ; >>> + >>> + into: >>> + >>> + T = v1 | v2 | v3 | v4; >>> + accumulator |= T[0] | T[1] | T[2] | T[3]; >>> + >>> + Fewer bit_field_refs, only four for 128 or more bits vector. */ >>> + >>> +typedef int v4si __attribute__((vector_size(16))); >>> +int test(int accumulator, v4si v1, v4si v2, v4si v3, v4si v4) { >>> + accumulator |= v1[0] | v1[1] | v1[2] | v1[3]; >>> + accumulator |= v2[0] | v2[1] | v2[2] | v2[3]; >>> + accumulator |= v3[0] | v3[1] | v3[2] | v3[3]; >>> + accumulator |= v4[0] | v4[1] | v4[2] | v4[3]; >>> + return accumulator; >>> +} >>> +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target >>> { powerpc*-*-* } } } } */ >>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-5.c >>> b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-5.c >>> new file mode 100644 >>> index 0000000..f747372 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88497-5.c >>> @@ -0,0 +1,33 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target vect_int } */ >>> +/* { dg-require-effective-target powerpc_altivec_ok { target { >>> powerpc*-*-* } } } */ >>> +/* { dg-options "-O2 -ffast-math" } */ >>> +/* { dg-options "-O2 -ffast-math -maltivec -fdump-tree-reassoc1" { target >>> { powerpc*-*-* } } } */ >>> + >>> +/* To test reassoc can undistribute vector bit_field_ref on bitwise XOR. >>> + >>> + v1, v2, v3, v4 of type vector int. >>> + >>> + reassoc transforms >>> + >>> + accumulator ^= v1[0] ^ v1[1] ^ v1[2] ^ v1[3] ^ >>> + v2[0] ^ v2[1] ^ v2[2] ^ v2[3] ^ >>> + v3[0] ^ v3[1] ^ v3[2] ^ v3[3] ^ >>> + v4[0] ^ v4[1] ^ v4[2] ^ v4[3] ; >>> + >>> + into: >>> + >>> + T = v1 ^ v2 ^ v3 ^ v4; >>> + accumulator ^= T[0] ^ T[1] ^ T[2] ^ T[3]; >>> + >>> + Fewer bit_field_refs, only four for 128 or more bits vector. */ >>> + >>> +typedef int v4si __attribute__((vector_size(16))); >>> +int test(int accumulator, v4si v1, v4si v2, v4si v3, v4si v4) { >>> + accumulator ^= v1[0] ^ v1[1] ^ v1[2] ^ v1[3]; >>> + accumulator ^= v2[0] ^ v2[1] ^ v2[2] ^ v2[3]; >>> + accumulator ^= v3[0] ^ v3[1] ^ v3[2] ^ v3[3]; >>> + accumulator ^= v4[0] ^ v4[1] ^ v4[2] ^ v4[3]; >>> + return accumulator; >>> +} >>> +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target >>> { powerpc*-*-* } } } } */ >>> diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c >>> index e1c4dfe..a6cd85a 100644 >>> --- a/gcc/tree-ssa-reassoc.c >>> +++ b/gcc/tree-ssa-reassoc.c >>> @@ -1772,6 +1772,295 @@ undistribute_ops_list (enum tree_code opcode, >>> return changed; >>> } >>> >>> +/* Hold the information of one specific VECTOR_TYPE SSA_NAME. >>> + - offsets: for different BIT_FIELD_REF offsets accessing same VECTOR. >>> + - ops_indexes: the index of vec ops* for each relavant BIT_FIELD_REF. >>> */ >>> +struct v_info >>> +{ >>> + auto_vec<unsigned HOST_WIDE_INT, 32> offsets; >>> + auto_vec<unsigned, 32> ops_indexes; >>> +}; >>> + >>> +typedef struct v_info *v_info_ptr; >>> + >>> +/* Comparison function for qsort on unsigned BIT_FIELD_REF offsets. */ >>> +static int >>> +unsigned_cmp (const void *p_i, const void *p_j) >>> +{ >>> + if (*(const unsigned HOST_WIDE_INT *) p_i >>> + >= *(const unsigned HOST_WIDE_INT *) p_j) >>> + return 1; >>> + else >>> + return -1; >>> +} >>> + >>> +/* Cleanup hash map for VECTOR information. */ >>> +static void >>> +cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map) >>> +{ >>> + for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin (); >>> + it != info_map.end (); ++it) >>> + { >>> + v_info_ptr info = (*it).second; >>> + delete info; >>> + (*it).second = NULL; >>> + } >>> +} >>> + >>> +/* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE. >>> + V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k] >>> + is transformed to >>> + Vs = (V1 + V2 + ... + Vn) >>> + Vs[0] + Vs[1] + ... + Vs[k] >>> + >>> + The basic steps are listed below: >>> + >>> + 1) Check the addition chain *OPS by looking those summands coming from >>> + VECTOR bit_field_ref on VECTOR type. Put the information into >>> + v_info_map for each satisfied summand, using VECTOR SSA_NAME as key. >>> + >>> + 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are >>> + continous, they can cover the whole VECTOR perfectly without any >>> holes. >>> + Obtain one VECTOR list which contain candidates to be transformed. >>> + >>> + 3) Build the addition statements for all VECTOR candidates, generate >>> + BIT_FIELD_REFs accordingly. >>> + >>> + TODO: >>> + 1) The current implementation restrict all candidate VECTORs should >>> have >>> + the same VECTOR type, but it can be extended into different groups >>> by >>> + VECTOR types in future if any profitable cases found. >>> + 2) The current implementation requires the whole VECTORs should be >>> fully >>> + covered, but it can be extended to support partial, checking >>> adjacent >>> + but not fill the whole, it may need some cost model to define the >>> + boundary to do or not. >>> +*/ >>> +static bool >>> +undistribute_bitref_for_vector (enum tree_code opcode, vec<operand_entry >>> *> *ops, >>> + struct loop *loop) >>> +{ >>> + if (ops->length () <= 1) >>> + return false; >>> + >>> + if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR >>> + && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) >>> + return false; >>> + >>> + hash_map<tree, v_info_ptr> v_info_map; >>> + operand_entry *oe1; >>> + unsigned i; >>> + >>> + /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put >>> the >>> + information into map. */ >>> + FOR_EACH_VEC_ELT (*ops, i, oe1) >>> + { >>> + enum tree_code dcode; >>> + gimple *oe1def; >>> + >>> + if (TREE_CODE (oe1->op) != SSA_NAME) >>> + continue; >>> + oe1def = SSA_NAME_DEF_STMT (oe1->op); >>> + if (!is_gimple_assign (oe1def)) >>> + continue; >>> + dcode = gimple_assign_rhs_code (oe1def); >>> + if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, >>> loop)) >>> + continue; >>> + >>> + tree rhs = gimple_op (oe1def, 1); >>> + tree op0 = TREE_OPERAND (rhs, 0); >>> + tree vec_type = TREE_TYPE (op0); >>> + >>> + if (TREE_CODE (op0) != SSA_NAME || TREE_CODE (vec_type) != >>> VECTOR_TYPE) >>> + continue; >>> + >>> + tree op1 = TREE_OPERAND (rhs, 1); >>> + tree op2 = TREE_OPERAND (rhs, 2); >>> + >>> + tree elem_type = TREE_TYPE (vec_type); >>> + unsigned HOST_WIDE_INT size = TREE_INT_CST_LOW (TYPE_SIZE >>> (elem_type)); >>> + if (size != TREE_INT_CST_LOW (op1)) >>> + continue; >>> + >>> + /* Ignore it if target machine can't support this VECTOR type. */ >>> + if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) >>> + continue; >>> + >>> + /* Ignore it if target machine can't support this type of VECTOR >>> + operation. */ >>> + optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector); >>> + if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing) >>> + continue; >>> + >>> + v_info_ptr *info_ptr = v_info_map.get (op0); >>> + if (info_ptr) >>> + { >>> + v_info_ptr info = *info_ptr; >>> + info->offsets.safe_push (TREE_INT_CST_LOW (op2)); >>> + info->ops_indexes.safe_push (i); >>> + } >>> + else >>> + { >>> + v_info_ptr info = new v_info; >>> + info->offsets.safe_push (TREE_INT_CST_LOW (op2)); >>> + info->ops_indexes.safe_push (i); >>> + v_info_map.put (op0, info); >>> + } >>> + } >>> + >>> + /* At least two VECTOR to combine. */ >>> + if (v_info_map.elements () <= 1) >>> + { >>> + cleanup_vinfo_map (v_info_map); >>> + return false; >>> + } >>> + >>> + /* Use the first VECTOR and its information as the reference. >>> + Firstly, we should validate it, that is: >>> + 1) sorted offsets are adjacent, no holes. >>> + 2) can fill the whole VECTOR perfectly. */ >>> + hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin (); >>> + tree ref_vec = (*it).first; >>> + v_info_ptr ref_info = (*it).second; >>> + ref_info->offsets.qsort (unsigned_cmp); >>> + tree vec_type = TREE_TYPE (ref_vec); >>> + tree elem_type = TREE_TYPE (vec_type); >>> + unsigned HOST_WIDE_INT elem_size = TREE_INT_CST_LOW (TYPE_SIZE >>> (elem_type)); >>> + unsigned HOST_WIDE_INT curr; >>> + unsigned HOST_WIDE_INT prev = ref_info->offsets[0]; >>> + >>> + /* Continous check. */ >>> + FOR_EACH_VEC_ELT_FROM (ref_info->offsets, i, curr, 1) >>> + { >>> + if (curr != (prev + elem_size)) >>> + { >>> + cleanup_vinfo_map (v_info_map); >>> + return false; >>> + } >>> + prev = curr; >>> + } >>> + >>> + /* Check whether fill the whole. */ >>> + if ((prev + elem_size) != TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE >>> (ref_vec)))) >>> + { >>> + cleanup_vinfo_map (v_info_map); >>> + return false; >>> + } >>> + >>> + auto_vec<tree> vectors (v_info_map.elements ()); >>> + vectors.quick_push (ref_vec); >>> + >>> + /* Use the ref_vec to filter others. */ >>> + for (++it; it != v_info_map.end (); ++it) >>> + { >>> + tree vec = (*it).first; >>> + v_info_ptr info = (*it).second; >>> + if (TREE_TYPE (ref_vec) != TREE_TYPE (vec)) >>> + continue; >>> + if (ref_info->offsets.length () != info->offsets.length ()) >>> + continue; >>> + bool same_offset = true; >>> + info->offsets.qsort (unsigned_cmp); >>> + for (unsigned i = 0; i < ref_info->offsets.length (); i++) >>> + { >>> + if (ref_info->offsets[i] != info->offsets[i]) >>> + { >>> + same_offset = false; >>> + break; >>> + } >>> + } >>> + if (!same_offset) >>> + continue; >>> + vectors.quick_push (vec); >>> + } >>> + >>> + if (vectors.length () < 2) >>> + { >>> + cleanup_vinfo_map (v_info_map); >>> + return false; >>> + } >>> + >>> + tree tr; >>> + if (dump_file && (dump_flags & TDF_DETAILS)) >>> + { >>> + fprintf (dump_file, "The bit_field_ref vector list for undistribute: >>> "); >>> + FOR_EACH_VEC_ELT (vectors, i, tr) >>> + { >>> + print_generic_expr (dump_file, tr); >>> + fprintf (dump_file, " "); >>> + } >>> + fprintf (dump_file, "\n"); >>> + } >>> + >>> + /* Build the sum for all candidate VECTORs. */ >>> + unsigned idx; >>> + gimple *sum = NULL; >>> + v_info_ptr info; >>> + tree sum_vec = ref_vec; >>> + FOR_EACH_VEC_ELT_FROM (vectors, i, tr, 1) >>> + { >>> + sum = build_and_add_sum (TREE_TYPE (ref_vec), sum_vec, tr, opcode); >>> + info = *(v_info_map.get (tr)); >>> + unsigned j; >>> + FOR_EACH_VEC_ELT (info->ops_indexes, j, idx) >>> + { >>> + gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); >>> + gimple_set_visited (def, true); >>> + if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR >>> + || opcode == BIT_IOR_EXPR) >>> + (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); >>> + else if (opcode == MULT_EXPR) >>> + (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op)); >>> + else >>> + { >>> + gcc_assert (opcode == BIT_AND_EXPR); >>> + (*ops)[idx]->op >>> + = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op)); >>> + } >>> + (*ops)[idx]->rank = 0; >>> + } >>> + sum_vec = gimple_get_lhs (sum); >>> + if (dump_file && (dump_flags & TDF_DETAILS)) >>> + { >>> + fprintf (dump_file, "Generating addition -> "); >>> + print_gimple_stmt (dump_file, sum, 0); >>> + } >>> + } >>> + >>> + /* Referring to any good shape VECTOR (here using ref_vec), generate the >>> + BIT_FIELD_REF statements accordingly. */ >>> + info = *(v_info_map.get (ref_vec)); >>> + gcc_assert (sum); >>> + FOR_EACH_VEC_ELT (info->ops_indexes, i, idx) >>> + { >>> + tree dst = make_ssa_name (elem_type); >>> + gimple *gs >>> + = gimple_build_assign (dst, BIT_FIELD_REF, >>> + build3 (BIT_FIELD_REF, elem_type, sum_vec, >>> + TYPE_SIZE (elem_type), >>> + bitsize_int (info->offsets[i]))); >>> + insert_stmt_after (gs, sum); >>> + update_stmt (gs); >>> + gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); >>> + gimple_set_visited (def, true); >>> + (*ops)[idx]->op = gimple_assign_lhs (gs); >>> + (*ops)[idx]->rank = get_rank ((*ops)[idx]->op); >>> + if (dump_file && (dump_flags & TDF_DETAILS)) >>> + { >>> + fprintf (dump_file, "Generating bit_field_ref -> "); >>> + print_gimple_stmt (dump_file, gs, 0); >>> + } >>> + } >>> + >>> + if (dump_file && (dump_flags & TDF_DETAILS)) >>> + { >>> + fprintf (dump_file, "undistributiong bit_field_ref for vector >>> done.\n"); >>> + } >>> + >>> + cleanup_vinfo_map (v_info_map); >>> + >>> + return true; >>> +} >>> + >>> /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison >>> expression, examine the other OPS to see if any of them are comparisons >>> of the same values, which we may be able to combine or eliminate. >>> @@ -5880,11 +6169,6 @@ reassociate_bb (basic_block bb) >>> tree lhs, rhs1, rhs2; >>> enum tree_code rhs_code = gimple_assign_rhs_code (stmt); >>> >>> - /* If this is not a gimple binary expression, there is >>> - nothing for us to do with it. */ >>> - if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) >>> - continue; >>> - >>> /* If this was part of an already processed statement, >>> we don't need to touch it again. */ >>> if (gimple_visited_p (stmt)) >>> @@ -5911,6 +6195,11 @@ reassociate_bb (basic_block bb) >>> continue; >>> } >>> >>> + /* If this is not a gimple binary expression, there is >>> + nothing for us to do with it. */ >>> + if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) >>> + continue; >>> + >>> lhs = gimple_assign_lhs (stmt); >>> rhs1 = gimple_assign_rhs1 (stmt); >>> rhs2 = gimple_assign_rhs2 (stmt); >>> @@ -5950,6 +6239,13 @@ reassociate_bb (basic_block bb) >>> optimize_ops_list (rhs_code, &ops); >>> } >>> >>> + if (undistribute_bitref_for_vector (rhs_code, &ops, >>> + loop_containing_stmt (stmt))) >>> + { >>> + ops.qsort (sort_by_operand_rank); >>> + optimize_ops_list (rhs_code, &ops); >>> + } >>> + >>> if (rhs_code == PLUS_EXPR >>> && transform_add_to_multiply (&ops)) >>> ops.qsort (sort_by_operand_rank); >>> >