Tamar Christina <tamar.christ...@arm.com> writes: > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no > issues. > > gcc/ChangeLog: > > * tree-vect-stmts.c (prepare_load_store_mask): Rename to... > (prepare_vec_mask): ...This and record operations that have already been > masked. > (vectorizable_call): Use it. > (vectorizable_operation): Likewise. > (vectorizable_store): Likewise. > (vectorizable_load): Likewise. > * tree-vectorizer.c (vec_cond_masked_key::get_cond_ops_from_tree): New. > * tree-vectorizer.h (struct vec_cond_masked_key): New. > (class _loop_vec_info): Add vec_cond_masked_set. > (vec_cond_masked_set_type): New. > (struct default_hash_traits<vec_cond_masked_key>): New. > > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/pred-combine-and.c: New test. > > --- inline copy of patch --- > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c > b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..ee927346abe518caa3cba397b11dfd1ee7e93630 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > + > +void f5(float * restrict z0, float * restrict z1, float *restrict x, float * > restrict y, float c, int n) > +{ > + for (int i = 0; i < n; i++) { > + float a = x[i]; > + float b = y[i]; > + if (a > b) { > + z0[i] = a + b; > + if (a > c) { > + z1[i] = a - b; > + } > + } > + } > +} > + > +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, > z[0-9]+\.s, z[0-9]+\.s} 2 } } */ > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index > 2284ad069e4d521f4e0bd43d34181a258cd636ef..b1946b589043312a9b29d832f9b8398e24787a5f > 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -1796,23 +1796,30 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > /* Return the mask input to a masked load or store. VEC_MASK is the > vectorized > form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask > that needs to be applied to all loads and stores in a vectorized loop. > - Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & > LOOP_MASK. > + Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked, > + otherwise return VEC_MASK & LOOP_MASK. > > MASK_TYPE is the type of both masks. If new statements are needed, > insert them before GSI. */ > > static tree > -prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask, > - gimple_stmt_iterator *gsi) > +prepare_vec_mask (tree mask_type, loop_vec_info loop_vinfo, tree loop_mask, > + tree vec_mask, gimple_stmt_iterator *gsi)
Minor, but: loop_vinfo normally comes first when present. > { > gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask))); > if (!loop_mask) > return vec_mask; > > gcc_assert (TREE_TYPE (loop_mask) == mask_type); > + > + vec_cond_masked_key cond (vec_mask, loop_mask); > + if (loop_vinfo->vec_cond_masked_set.contains (cond)) > + return vec_mask; > + > tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and"); > gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR, > vec_mask, loop_mask); > + > gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT); > return and_res; > } > @@ -3526,8 +3533,9 @@ vectorizable_call (vec_info *vinfo, > gcc_assert (ncopies == 1); > tree mask = vect_get_loop_mask (gsi, masks, vec_num, > vectype_out, i); > - vargs[mask_opno] = prepare_load_store_mask > - (TREE_TYPE (mask), mask, vargs[mask_opno], gsi); > + vargs[mask_opno] = prepare_vec_mask > + (TREE_TYPE (mask), loop_vinfo, mask, > + vargs[mask_opno], gsi); > } > > gcall *call; > @@ -3564,8 +3572,8 @@ vectorizable_call (vec_info *vinfo, > tree mask = vect_get_loop_mask (gsi, masks, ncopies, > vectype_out, j); > vargs[mask_opno] > - = prepare_load_store_mask (TREE_TYPE (mask), mask, > - vargs[mask_opno], gsi); > + = prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask, > + vargs[mask_opno], gsi); > } > > gimple *new_stmt; > @@ -6302,10 +6310,46 @@ vectorizable_operation (vec_info *vinfo, > } > else > { > + tree mask = NULL_TREE; > + /* When combining two masks check is either of them is elsewhere > + combined with a loop mask, if that's the case we can mark that the > + new combined mask doesn't need to be combined with a loop mask. */ > + if (masked_loop_p && code == BIT_AND_EXPR) > + { > + scalar_cond_masked_key cond0 (op0, ncopies); > + if (loop_vinfo->scalar_cond_masked_set.contains (cond0)) > + { > + mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, > + vectype, i); > + > + vop0 = prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask, > + vop0, gsi); > + } > + > + scalar_cond_masked_key cond1 (op1, ncopies); > + if (loop_vinfo->scalar_cond_masked_set.contains (cond1)) > + { > + mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, > + vectype, i); > + > + vop1 = prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask, > + vop1, gsi); > + } > + } > + > new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2); > new_temp = make_ssa_name (vec_dest, new_stmt); > gimple_assign_set_lhs (new_stmt, new_temp); > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > + > + /* Enter the combined value into the vector cond hash so we don't > + AND it with a loop mask again. */ > + if (mask) > + { > + vec_cond_masked_key cond (new_temp, mask); > + loop_vinfo->vec_cond_masked_set.add (cond); > + } > + > if (vec_cvt_dest) > { > new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); > @@ -8166,8 +8210,8 @@ vectorizable_store (vec_info *vinfo, > final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, > vectype, j); > if (vec_mask) > - final_mask = prepare_load_store_mask (mask_vectype, final_mask, > - vec_mask, gsi); > + final_mask = prepare_vec_mask (mask_vectype, loop_vinfo, > + final_mask, vec_mask, gsi); > > gcall *call; > if (final_mask) > @@ -8221,8 +8265,8 @@ vectorizable_store (vec_info *vinfo, > vec_num * ncopies, > vectype, vec_num * j + i); > if (vec_mask) > - final_mask = prepare_load_store_mask (mask_vectype, final_mask, > - vec_mask, gsi); > + final_mask = prepare_vec_mask (mask_vectype, loop_vinfo, > + final_mask, vec_mask, gsi); > > if (memory_access_type == VMAT_GATHER_SCATTER) > { > @@ -9442,8 +9486,8 @@ vectorizable_load (vec_info *vinfo, > final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, > vectype, j); > if (vec_mask) > - final_mask = prepare_load_store_mask (mask_vectype, final_mask, > - vec_mask, gsi); > + final_mask = prepare_vec_mask (mask_vectype, loop_vinfo, > + final_mask, vec_mask, gsi); > > gcall *call; > if (final_mask) > @@ -9494,8 +9538,8 @@ vectorizable_load (vec_info *vinfo, > vec_num * ncopies, > vectype, vec_num * j + i); > if (vec_mask) > - final_mask = prepare_load_store_mask (mask_vectype, final_mask, > - vec_mask, gsi); > + final_mask = prepare_vec_mask (mask_vectype, loop_vinfo, > + final_mask, vec_mask, gsi); > > if (i > 0) > dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index > d466be6ffcb7b2f7724332367cce2eee75245240..894f30b35fa5491e31c58ecb8a46717ef3598a9a > 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -327,6 +327,80 @@ struct default_hash_traits<scalar_cond_masked_key> > > typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type; > > +/* Key for map that records association between > + vector conditions and corresponding loop mask, and > + is populated by prepare_vec_mask. */ > + > +struct vec_cond_masked_key > +{ > + vec_cond_masked_key (tree t, tree loop_mask_) > + : loop_mask (loop_mask_) > + { > + get_cond_ops_from_tree (t); > + } > + > + void get_cond_ops_from_tree (tree); > + > + tree loop_mask; > + tree_code code; > + tree op0; > + tree op1; > +}; > + > +template<> > +struct default_hash_traits<vec_cond_masked_key> > +{ > + typedef vec_cond_masked_key compare_type; > + typedef vec_cond_masked_key value_type; > + > + static inline hashval_t > + hash (value_type v) > + { > + inchash::hash h; > + h.add_int (v.code); > + inchash::add_expr (v.op0, h, 0); > + inchash::add_expr (v.op1, h, 0); > + h.add_ptr (v.loop_mask); > + return h.end (); > + } > + > + static inline bool > + equal (value_type existing, value_type candidate) > + { > + return (existing.code == candidate.code > + && existing.loop_mask == candidate.loop_mask > + && operand_equal_p (existing.op0, candidate.op0, 0) > + && operand_equal_p (existing.op1, candidate.op1, 0)); > + } > + > + static const bool empty_zero_p = true; > + > + static inline void > + mark_empty (value_type &v) > + { > + v.loop_mask = NULL_TREE; > + } > + > + static inline bool > + is_empty (value_type v) > + { > + return v.loop_mask == NULL_TREE; > + } > + > + static inline void mark_deleted (value_type &) {} > + > + static inline bool is_deleted (const value_type &) > + { > + return false; > + } > + > + static inline void remove (value_type &) {} > +}; > + > +typedef hash_set<vec_cond_masked_key> vec_cond_masked_set_type; > + > + > + > /* Describes two objects whose addresses must be unequal for the vectorized > loop to be valid. */ > typedef std::pair<tree, tree> vec_object_pair; > @@ -643,6 +717,9 @@ public: > /* Set of scalar conditions that have loop mask applied. */ > scalar_cond_masked_set_type scalar_cond_masked_set; > > + /* Set of vector conditions that have loop mask applied. */ > + vec_cond_masked_set_type vec_cond_masked_set; > + > /* If we are using a loop mask to align memory addresses, this variable > contains the number of vector elements that we should skip in the > first iteration of the vector loop (i.e. the number of leading > diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c > index > 5c93961869db08e5ef4e2d22deffdaba8554d78f..4e4d3ca49b46dc051c9084372e8501457e23c632 > 100644 > --- a/gcc/tree-vectorizer.c > +++ b/gcc/tree-vectorizer.c > @@ -1720,6 +1720,40 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t) > this->inverted_p = false; > } > > +/* If the condition represented by T is a comparison or the SSA name > + result of a comparison, extract the comparison's operands. Represent > + T as NE_EXPR <T, 0> otherwise. */ > + > +void > +vec_cond_masked_key::get_cond_ops_from_tree (tree t) > +{ > + if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison) > + { > + this->code = TREE_CODE (t); > + this->op0 = TREE_OPERAND (t, 0); > + this->op1 = TREE_OPERAND (t, 1); > + return; > + } > + > + if (TREE_CODE (t) == SSA_NAME) > + if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t))) > + { > + tree_code code = gimple_assign_rhs_code (stmt); > + if (TREE_CODE_CLASS (code) == tcc_comparison) > + { > + this->code = code; > + this->op0 = gimple_assign_rhs1 (stmt); > + this->op1 = gimple_assign_rhs2 (stmt); > + return; > + } > + } > + > + this->code = NE_EXPR; > + this->op0 = t; > + this->op1 = build_zero_cst (TREE_TYPE (t)); > +} > + > + This hashing looks unnecessarily complex. The values we're hashing are vector SSA_NAMEs, so I think we should be able to hash and compare them as a plain pair of pointers. The type could then be std::pair and the hashing could be done using pair_hash from hash-traits.h. Thanks, Richard