I was working some aspects of our port's vector support and stumbled
across a bit of silly code. We were comparing two vectors that were
both uniform.
We can simplify a comparison of uniform vectors to a comparison of a
representative element from each. That in turn may expose const/copy
propagation opportunities or even jump threading opportunities.
And that's precisely what this patch does inside DOM. At the start of
statement processing we see if we can reduce a vector comparison to a
scalar comparison.
You can see this in pr95308.C on x86. As we enter dom3 we have:
;; basic block 2, loop depth 0
;; pred: ENTRY
e.1_1 = e;
_27 = f_26(D) != 0;
_163 = e.1_1 != 0;
_164 = _27 & _163;
_196 = _164 ? -1 : 0;
vect_cst__194 = {_196, _196, _196, _196, _196, _196, _196, _196};
[ ... ]
;; basic block 8, loop depth 1
;; pred: 7
;; 6
if (vect_cst__194 == { 0, 0, 0, 0, 0, 0, 0, 0 })
goto <bb 10>; [100.00%]
else
goto <bb 9>; [20.00%]
;; succ: 10
;; 9
There's another similar comparison later. We transform the comparison into:
;; basic block 8, loop depth 1
;; pred: 7
;; 6
if (_196 == 0)
goto <bb 13>; [100.00%]
else
goto <bb 9>; [20.00%]
;; succ: 13
;; 9
There's nice secondary effects that show up after the transformation as
well.
Bootstrapped and regression tested on x86_64. It'll go through the
rest of the public targets as well as our internal port over the next
24-48 hrs.
Committed to the trunk,
Jeff
commit ac6d5c9112bb78e47398e040c553ad216edf3ebb
Author: Jeff Law <jlaw@localhost.localdomain>
Date: Fri Aug 27 15:27:38 2021 -0400
Reduce vector comparison of uniform vectors to a scalar comparison
gcc/
* tree-ssa-dom.c (reduce_vector_comparison_to_scalar_comparison):
New
function.
(dom_opt_dom_walker::optimize_stmt): Use it.
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index a0df492df10..a5245b33de6 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -1891,6 +1891,66 @@ dom_opt_dom_walker::test_for_singularity (gimple *stmt,
}
}
+/* If STMT is a comparison of two uniform vectors reduce it to a comparison
+ of scalar objects, otherwise leave STMT unchanged. */
+
+static void
+reduce_vector_comparison_to_scalar_comparison (gimple *stmt)
+{
+ if (gimple_code (stmt) == GIMPLE_COND)
+ {
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+
+ /* We may have a vector comparison where both arms are uniform
+ vectors. If so, we can simplify the vector comparison down
+ to a scalar comparison. */
+ if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE
+ && TREE_CODE (TREE_TYPE (rhs)) == VECTOR_TYPE)
+ {
+ /* If either operand is an SSA_NAME, then look back to its
+ defining statement to try and get at a suitable source. */
+ if (TREE_CODE (rhs) == SSA_NAME)
+ {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
+ if (gimple_assign_single_p (def_stmt))
+ rhs = gimple_assign_rhs1 (def_stmt);
+ }
+
+ if (TREE_CODE (lhs) == SSA_NAME)
+ {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (lhs);
+ if (gimple_assign_single_p (def_stmt))
+ lhs = gimple_assign_rhs1 (def_stmt);
+ }
+
+ /* Now see if they are both uniform vectors and if so replace
+ the vector comparison with a scalar comparison. */
+ tree rhs_elem = rhs ? uniform_vector_p (rhs) : NULL_TREE;
+ tree lhs_elem = lhs ? uniform_vector_p (lhs) : NULL_TREE;
+ if (rhs_elem && lhs_elem)
+ {
+ if (dump_file && dump_flags & TDF_DETAILS)
+ {
+ fprintf (dump_file, "Reducing vector comparison: ");
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+
+ gimple_cond_set_rhs (as_a <gcond *>(stmt), rhs_elem);
+ gimple_cond_set_lhs (as_a <gcond *>(stmt), lhs_elem);
+ gimple_set_modified (stmt, true);
+
+ if (dump_file && dump_flags & TDF_DETAILS)
+ {
+ fprintf (dump_file, "To scalar equivalent: ");
+ print_gimple_stmt (dump_file, stmt, 0);
+ fprintf (dump_file, "\n");
+ }
+ }
+ }
+ }
+}
+
/* Optimize the statement in block BB pointed to by iterator SI.
We try to perform some simplistic global redundancy elimination and
@@ -1933,6 +1993,11 @@ dom_opt_dom_walker::optimize_stmt (basic_block bb,
gimple_stmt_iterator *si,
update_stmt_if_modified (stmt);
opt_stats.num_stmts++;
+ /* STMT may be a comparison of uniform vectors that we can simplify
+ down to a comparison of scalars. Do that transformation first
+ so that all the scalar optimizations from here onward apply. */
+ reduce_vector_comparison_to_scalar_comparison (stmt);
+
/* Const/copy propagate into USES, VUSES and the RHS of VDEFs. */
cprop_into_stmt (stmt, m_evrp_range_analyzer);