> Jeff Law wrote:
> Can you move pr45685.c into gcc.target/i386?
>
> I know Richi said next stage1, but given this fixes a performance
> regression for ARM and it's reverting rather than adding new code, I
> think this is OK for the trunk with the testcase moved.
>
> So, OK with the testcase moved into gcc.target/i386/
I've moved it and changed the compile condition:
/* { dg-do compile { target { ! { ia32 } } } } */
Jiong, can you commit this please?
Wilco
2015-03-06 Wilco Dijkstra <wdijk...@arm.com>
* gcc/tree-ssa-phiopt.c (neg_replacement): Remove.
(tree_ssa_phiopt_worker): Remove negate optimization.
* gcc/testsuite/gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
* gcc/testsuite/gcc.target/aarch64/csneg-1.c (test_csneg_cmp):
New test.
* gcc/gcc.target/i386/pr45685.c: Moved test, check for conditional
move on x64.
---
gcc/testsuite/gcc.dg/tree-ssa/pr45685.c | 41 --------
gcc/testsuite/gcc.target/aarch64/csneg-1.c | 8 ++
gcc/testsuite/gcc.target/i386/pr45685.c | 39 ++++++++
gcc/tree-ssa-phiopt.c | 156 -----------------------------
4 files changed, 47 insertions(+), 197 deletions(-)
delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr45685.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
deleted file mode 100644
index 0628943..0000000
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */
-
-typedef unsigned long int uint64_t;
-typedef long int int64_t;
-int summation_helper_1(int64_t* products, uint64_t count)
-{
- int s = 0;
- uint64_t i;
- for(i=0; i<count; i++)
- {
- int64_t val = (products[i]>0) ? 1 : -1;
- products[i] *= val;
- if(products[i] != i)
- val = -val;
- products[i] = val;
- s += val;
- }
- return s;
-}
-
-
-int summation_helper_2(int64_t* products, uint64_t count)
-{
- int s = 0;
- uint64_t i;
- for(i=0; i<count; i++)
- {
- int val = (products[i]>0) ? 1 : -1;
- products[i] *= val;
- if(products[i] != i)
- val = -val;
- products[i] = val;
- s += val;
- }
- return s;
-}
-
-/* { dg-final { scan-tree-dump-times "converted to straightline code" 2
"phiopt1" } } */
-/* { dg-final { cleanup-tree-dump "phiopt1" } } */
-
diff --git a/gcc/testsuite/gcc.target/aarch64/csneg-1.c
b/gcc/testsuite/gcc.target/aarch64/csneg-1.c
index 08001af..29d4e4e 100644
--- a/gcc/testsuite/gcc.target/aarch64/csneg-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/csneg-1.c
@@ -48,3 +48,11 @@ test_csneg64_condasn2(long long x0,
x4 = (x0 == x1) ? x3 : -x2;
return x4;
}
+
+int test_csneg_cmp(int x)
+{
+ /* { dg-final { scan-assembler "csneg\tw\[0-9\]" } } */
+ if (x > 3)
+ x = -x;
+ return x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr45685.c
b/gcc/testsuite/gcc.target/i386/pr45685.c
new file mode 100644
index 0000000..7f50bb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr45685.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O3" } */
+
+typedef unsigned long int uint64_t;
+typedef long int int64_t;
+int summation_helper_1(int64_t* products, uint64_t count)
+{
+ int s = 0;
+ uint64_t i;
+ for(i=0; i<count; i++)
+ {
+ int64_t val = (products[i]>0) ? 1 : -1;
+ products[i] *= val;
+ if(products[i] != i)
+ val = -val;
+ products[i] = val;
+ s += val;
+ }
+ return s;
+}
+
+
+int summation_helper_2(int64_t* products, uint64_t count)
+{
+ int s = 0;
+ uint64_t i;
+ for(i=0; i<count; i++)
+ {
+ int val = (products[i]>0) ? 1 : -1;
+ products[i] *= val;
+ if(products[i] != i)
+ val = -val;
+ products[i] = val;
+ s += val;
+ }
+ return s;
+}
+
+/* { dg-final { scan-assembler-times "cmov" 4 } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index c7fb073..14a7122 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -96,8 +96,6 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple, tree, tree);
-static bool neg_replacement (basic_block, basic_block,
- edge, edge, gimple, tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block,
basic_block);
@@ -209,23 +207,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool
do_hoist_loads)
/* Calculate the set of non-trapping memory accesses. */
nontrap = get_non_trapping ();
- /* The replacement of conditional negation with a non-branching
- sequence is really only a win when optimizing for speed and we
- can avoid transformations by gimple if-conversion that result
- in poor RTL generation.
-
- Ideally either gimple if-conversion or the RTL expanders will
- be improved and the code to emit branchless conditional negation
- can be removed. */
- bool replace_conditional_negation = false;
- if (!do_store_elim)
- replace_conditional_negation
- = ((!optimize_size && optimize >= 2)
- || (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
- && flag_tree_loop_if_convert != 0)
- || flag_tree_loop_if_convert == 1
- || flag_tree_loop_if_convert_stores == 1));
-
/* Search every basic block for COND_EXPR we may be able to optimize.
We walk the blocks in order that guarantees that a block with
@@ -380,9 +361,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool
do_hoist_loads)
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (replace_conditional_negation
- && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
- cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
}
@@ -1319,140 +1297,6 @@ abs_replacement (basic_block cond_bb, basic_block
middle_bb,
return true;
}
-/* The function neg_replacement replaces conditional negation with
- equivalent straight line code. Returns TRUE if replacement is done,
- otherwise returns FALSE.
-
- COND_BB branches around negation occuring in MIDDLE_BB.
-
- E0 and E1 are edges out of COND_BB. E0 reaches MIDDLE_BB and
- E1 reaches the other successor which should contain PHI with
- arguments ARG0 and ARG1.
-
- Assuming negation is to occur when the condition is true,
- then the non-branching sequence is:
-
- result = (rhs ^ -cond) + cond
-
- Inverting the condition or its result gives us negation
- when the original condition is false. */
-
-static bool
-neg_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0 ATTRIBUTE_UNUSED, edge e1,
- gimple phi, tree arg0, tree arg1)
-{
- gimple new_stmt, cond;
- gimple_stmt_iterator gsi;
- gimple assign;
- edge true_edge, false_edge;
- tree rhs, lhs;
- enum tree_code cond_code;
- bool invert = false;
-
- /* This transformation performs logical operations on the
- incoming arguments. So force them to be integral types. */
- if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
- return false;
-
- /* OTHER_BLOCK must have only one executable statement which must have the
- form arg0 = -arg1 or arg1 = -arg0. */
-
- assign = last_and_only_stmt (middle_bb);
- /* If we did not find the proper negation assignment, then we can not
- optimize. */
- if (assign == NULL)
- return false;
-
- /* If we got here, then we have found the only executable statement
- in OTHER_BLOCK. If it is anything other than arg0 = -arg1 or
- arg1 = -arg0, then we can not optimize. */
- if (gimple_code (assign) != GIMPLE_ASSIGN)
- return false;
-
- lhs = gimple_assign_lhs (assign);
-
- if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
- return false;
-
- rhs = gimple_assign_rhs1 (assign);
-
- /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
- if (!(lhs == arg0 && rhs == arg1)
- && !(lhs == arg1 && rhs == arg0))
- return false;
-
- /* The basic sequence assumes we negate when the condition is true.
- If we need the opposite, then we will either need to invert the
- condition or its result. */
- extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
- invert = false_edge->dest == middle_bb;
-
- /* Unlike abs_replacement, we can handle arbitrary conditionals here. */
- cond = last_stmt (cond_bb);
- cond_code = gimple_cond_code (cond);
-
- /* If inversion is needed, first try to invert the test since
- that's cheapest. */
- if (invert)
- {
- bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
- enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
-
- /* If invert_tree_comparison was successful, then use its return
- value as the new code and note that inversion is no longer
- needed. */
- if (new_code != ERROR_MARK)
- {
- cond_code = new_code;
- invert = false;
- }
- }
-
- tree cond_val = make_ssa_name (boolean_type_node);
- new_stmt = gimple_build_assign (cond_val, cond_code,
- gimple_cond_lhs (cond),
- gimple_cond_rhs (cond));
- gsi = gsi_last_bb (cond_bb);
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
-
- /* If we still need inversion, then invert the result of the
- condition. */
- if (invert)
- {
- tree tmp = make_ssa_name (boolean_type_node);
- new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
- boolean_true_node);
- gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
- cond_val = tmp;
- }
-
- /* Get the condition in the right type so that we can perform
- logical and arithmetic operations on it. */
- tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
- new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
- gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
- tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
- new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
- cond_val_converted);
- gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
- tree tmp = make_ssa_name (TREE_TYPE (rhs));
- new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
- neg_cond_val_converted);
- gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
- tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
- new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
- gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
-
- replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
-
- /* Note that we optimized this PHI. */
- return true;
-}
-
/* Auxiliary functions to determine the set of memory accesses which
can't trap because they are preceded by accesses to the same memory
portion. We do that for MEM_REFs, so we only need to track
--
1.9.1