On Fri, May 13, 2016 at 5:53 PM, Richard Biener <richard.guent...@gmail.com> wrote: > On May 13, 2016 6:02:27 PM GMT+02:00, Bin Cheng <bin.ch...@arm.com> wrote: >>Hi, >>As PR69848 reported, GCC vectorizer now generates comparison outside of >>VEC_COND_EXPR for COND_REDUCTION case, as below: >> >> _20 = vect__1.6_8 != { 0, 0, 0, 0 }; >> vect_c_2.8_16 = VEC_COND_EXPR <_20, { 0, 0, 0, 0 }, vect_c_2.7_13>; >> _21 = VEC_COND_EXPR <_20, ivtmp_17, _19>; >> >>This results in inefficient expanding. With IR like: >> >>vect_c_2.8_16 = VEC_COND_EXPR <vect__1.6_8 != { 0, 0, 0, 0 }, { 0, 0, >>0, 0 }, vect_c_2.7_13>; >> _21 = VEC_COND_EXPR <vect__1.6_8 != { 0, 0, 0, 0 }, ivtmp_17, _19>; >> >>We can do: >>1) Expanding time optimization, for example, reverting comparison >>operator by switching VEC_COND_EXPR operands. This is useful when >>backend only supports some comparison operators. >>2) For backend not supporting vcond_mask patterns, saving one LT_EXPR >>instruction which introduced by expand_vec_cond_expr. >> >>This patch fixes this by propagating comparison into VEC_COND_EXPR even >>if it's used multiple times. For now, GCC does single_use_only >>propagation. Ideally, we may duplicate the comparison before each use >>statement just before expanding, so that TER can successfully backtrack >>it from each VEC_COND_EXPR. Unfortunately I didn't find a good pass to >>do this. Tree-vect-generic.c looks like a good candidate, but it's so >>early that following CSE could undo the transform. Another possible >>fix is to generate comparison inside VEC_COND_EXPR directly in function >>vectorizable_reduction. > > I prefer this for now. Hi Richard, you mean this patch, or the possible fix before your comment? Here is an updated patch addressing comment issue pointed out by Bernhard Reutner-Fischer. Thanks.
Thanks, bin > > Richard. > >>As for possible comparison CSE opportunities, I checked that it's >>simple enough to be handled by RTL CSE. >> >>Bootstrap and test on x86_64 and AArch64. Any comments? >> >>Thanks, >>bin >> >>2016-05-12 Bin Cheng <bin.ch...@arm.com> >> >> PR tree-optimization/69848 >> * optabs-tree.c (expand_vcond_mask_p, expand_vcond_p): New. >> (expand_vec_cmp_expr_p): Call above functions. >> * optabs-tree.h (expand_vcond_mask_p, expand_vcond_p): New. >> * tree-ssa-forwprop.c (optabs-tree.h): Include header file. >> (forward_propagate_into_cond): Propgate multiple uses for >> VEC_COND_EXPR. > >
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index faac087..13538e5 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -313,26 +313,51 @@ expand_vec_cmp_expr_p (tree value_type, tree mask_type) return (icode != CODE_FOR_nothing); } -/* Return TRUE iff, appropriate vector insns are available - for vector cond expr with vector type VALUE_TYPE and a comparison +/* Return TRUE iff appropriate vector insns are available + for VCOND_MASK pattern with vector type VALUE_TYPE and a comparison with operand vector types in CMP_OP_TYPE. */ bool -expand_vec_cond_expr_p (tree value_type, tree cmp_op_type) +expand_vcond_mask_p (tree value_type, tree cmp_op_type) { - machine_mode value_mode = TYPE_MODE (value_type); - machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type); if (VECTOR_BOOLEAN_TYPE_P (cmp_op_type) && get_vcond_mask_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type)) != CODE_FOR_nothing) return true; - if (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode) - || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode) - || get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), - TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing) - return false; - return true; + return false; +} + +/* Return TRUE iff appropriate vector insns are available + for VCOND pattern with vector type VALUE_TYPE and a comparison + with operand vector types in CMP_OP_TYPE. */ + +bool +expand_vcond_p (tree value_type, tree cmp_op_type) +{ + machine_mode value_mode = TYPE_MODE (value_type); + machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type); + if (GET_MODE_SIZE (value_mode) == GET_MODE_SIZE (cmp_op_mode) + && GET_MODE_NUNITS (value_mode) == GET_MODE_NUNITS (cmp_op_mode) + && get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), + TYPE_UNSIGNED (cmp_op_type)) != CODE_FOR_nothing) + return true; + + return false; +} + +/* Return TRUE iff appropriate vector insns are available + for vector cond expr with vector type VALUE_TYPE and a comparison + with operand vector types in CMP_OP_TYPE. */ + +bool +expand_vec_cond_expr_p (tree value_type, tree cmp_op_type) +{ + if (expand_vcond_mask_p (value_type, cmp_op_type) + || expand_vcond_p (value_type, cmp_op_type)) + return true; + + return false; } /* Use the current target and options to initialize diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h index c3b9280..feab40f 100644 --- a/gcc/optabs-tree.h +++ b/gcc/optabs-tree.h @@ -39,6 +39,8 @@ optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype); bool supportable_convert_operation (enum tree_code, tree, tree, tree *, enum tree_code *); bool expand_vec_cmp_expr_p (tree, tree); +bool expand_vcond_mask_p (tree, tree); +bool expand_vcond_p (tree, tree); bool expand_vec_cond_expr_p (tree, tree); void init_tree_optimization_optabs (tree); diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index c40f9e2..40f023f 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "ssa.h" #include "expmed.h" +#include "optabs-tree.h" #include "optabs-query.h" #include "gimple-pretty-print.h" #include "fold-const.h" @@ -585,7 +586,10 @@ forward_propagate_into_cond (gimple_stmt_iterator *gsi_p) { enum tree_code def_code; tree name = cond; - gimple *def_stmt = get_prop_source_stmt (name, true, NULL); + bool single_use_only = (code != VEC_COND_EXPR + || !expand_vcond_p (gimple_expr_type (stmt), + TREE_TYPE (cond))); + gimple *def_stmt = get_prop_source_stmt (name, single_use_only, NULL); if (!def_stmt || !can_propagate_from (def_stmt)) return 0;