On Fri, May 13, 2016 at 5:53 PM, Richard Biener
<[email protected]> wrote:
> On May 13, 2016 6:02:27 PM GMT+02:00, Bin Cheng <[email protected]> wrote:
>>Hi,
>>As PR69848 reported, GCC vectorizer now generates comparison outside of
>>VEC_COND_EXPR for COND_REDUCTION case, as below:
>>
>> _20 = vect__1.6_8 != { 0, 0, 0, 0 };
>> vect_c_2.8_16 = VEC_COND_EXPR <_20, { 0, 0, 0, 0 }, vect_c_2.7_13>;
>> _21 = VEC_COND_EXPR <_20, ivtmp_17, _19>;
>>
>>This results in inefficient expanding. With IR like:
>>
>>vect_c_2.8_16 = VEC_COND_EXPR <vect__1.6_8 != { 0, 0, 0, 0 }, { 0, 0,
>>0, 0 }, vect_c_2.7_13>;
>> _21 = VEC_COND_EXPR <vect__1.6_8 != { 0, 0, 0, 0 }, ivtmp_17, _19>;
>>
>>We can do:
>>1) Expanding time optimization, for example, reverting comparison
>>operator by switching VEC_COND_EXPR operands. This is useful when
>>backend only supports some comparison operators.
>>2) For backend not supporting vcond_mask patterns, saving one LT_EXPR
>>instruction which introduced by expand_vec_cond_expr.
>>
>>This patch fixes this by propagating comparison into VEC_COND_EXPR even
>>if it's used multiple times. For now, GCC does single_use_only
>>propagation. Ideally, we may duplicate the comparison before each use
>>statement just before expanding, so that TER can successfully backtrack
>>it from each VEC_COND_EXPR. Unfortunately I didn't find a good pass to
>>do this. Tree-vect-generic.c looks like a good candidate, but it's so
>>early that following CSE could undo the transform. Another possible
>>fix is to generate comparison inside VEC_COND_EXPR directly in function
>>vectorizable_reduction.
>
> I prefer this for now.
Hi Richard, you mean this patch, or the possible fix before your comment?
Here is an updated patch addressing comment issue pointed out by
Bernhard Reutner-Fischer. Thanks.
Thanks,
bin
>
> Richard.
>
>>As for possible comparison CSE opportunities, I checked that it's
>>simple enough to be handled by RTL CSE.
>>
>>Bootstrap and test on x86_64 and AArch64. Any comments?
>>
>>Thanks,
>>bin
>>
>>2016-05-12 Bin Cheng <[email protected]>
>>
>> PR tree-optimization/69848
>> * optabs-tree.c (expand_vcond_mask_p, expand_vcond_p): New.
>> (expand_vec_cmp_expr_p): Call above functions.
>> * optabs-tree.h (expand_vcond_mask_p, expand_vcond_p): New.
>> * tree-ssa-forwprop.c (optabs-tree.h): Include header file.
>> (forward_propagate_into_cond): Propgate multiple uses for
>> VEC_COND_EXPR.
>
>
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index faac087..13538e5 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -313,26 +313,51 @@ expand_vec_cmp_expr_p (tree value_type, tree mask_type)
return (icode != CODE_FOR_nothing);
}
-/* Return TRUE iff, appropriate vector insns are available
- for vector cond expr with vector type VALUE_TYPE and a comparison
+/* Return TRUE iff appropriate vector insns are available
+ for VCOND_MASK pattern with vector type VALUE_TYPE and a comparison
with operand vector types in CMP_OP_TYPE. */
bool
-expand_vec_cond_expr_p (tree value_type, tree cmp_op_type)
+expand_vcond_mask_p (tree value_type, tree cmp_op_type)
{
- machine_mode value_mode = TYPE_MODE (value_type);
- machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type);
if (VECTOR_BOOLEAN_TYPE_P (cmp_op_type)
&& get_vcond_mask_icode (TYPE_MODE (value_type),
TYPE_MODE (cmp_op_type)) != CODE_FOR_nothing)
return true;
- if (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode)
- || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode)
- || get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
- TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing)
- return false;
- return true;
+ return false;
+}
+
+/* Return TRUE iff appropriate vector insns are available
+ for VCOND pattern with vector type VALUE_TYPE and a comparison
+ with operand vector types in CMP_OP_TYPE. */
+
+bool
+expand_vcond_p (tree value_type, tree cmp_op_type)
+{
+ machine_mode value_mode = TYPE_MODE (value_type);
+ machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type);
+ if (GET_MODE_SIZE (value_mode) == GET_MODE_SIZE (cmp_op_mode)
+ && GET_MODE_NUNITS (value_mode) == GET_MODE_NUNITS (cmp_op_mode)
+ && get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
+ TYPE_UNSIGNED (cmp_op_type)) != CODE_FOR_nothing)
+ return true;
+
+ return false;
+}
+
+/* Return TRUE iff appropriate vector insns are available
+ for vector cond expr with vector type VALUE_TYPE and a comparison
+ with operand vector types in CMP_OP_TYPE. */
+
+bool
+expand_vec_cond_expr_p (tree value_type, tree cmp_op_type)
+{
+ if (expand_vcond_mask_p (value_type, cmp_op_type)
+ || expand_vcond_p (value_type, cmp_op_type))
+ return true;
+
+ return false;
}
/* Use the current target and options to initialize
diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
index c3b9280..feab40f 100644
--- a/gcc/optabs-tree.h
+++ b/gcc/optabs-tree.h
@@ -39,6 +39,8 @@ optab optab_for_tree_code (enum tree_code, const_tree, enum
optab_subtype);
bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
enum tree_code *);
bool expand_vec_cmp_expr_p (tree, tree);
+bool expand_vcond_mask_p (tree, tree);
+bool expand_vcond_p (tree, tree);
bool expand_vec_cond_expr_p (tree, tree);
void init_tree_optimization_optabs (tree);
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index c40f9e2..40f023f 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "ssa.h"
#include "expmed.h"
+#include "optabs-tree.h"
#include "optabs-query.h"
#include "gimple-pretty-print.h"
#include "fold-const.h"
@@ -585,7 +586,10 @@ forward_propagate_into_cond (gimple_stmt_iterator *gsi_p)
{
enum tree_code def_code;
tree name = cond;
- gimple *def_stmt = get_prop_source_stmt (name, true, NULL);
+ bool single_use_only = (code != VEC_COND_EXPR
+ || !expand_vcond_p (gimple_expr_type (stmt),
+ TREE_TYPE (cond)));
+ gimple *def_stmt = get_prop_source_stmt (name, single_use_only, NULL);
if (!def_stmt || !can_propagate_from (def_stmt))
return 0;