Hi, thanks for the feedback! I have sent a new version, keeping the match.pd patterns, fixing the formatting issues and changing std::set to hash_set (https://gcc.gnu.org/pipermail/gcc-patches/2025-March/677526.html).
Konstantinos On Mon, Mar 10, 2025 at 6:49 PM Andrew Pinski <pins...@gmail.com> wrote: > > On Mon, Mar 10, 2025 at 7:52 AM Konstantinos Eleftheriou > <konstantinos.elefther...@vrull.eu> wrote: > > > > Testcases for patterns `((a ^ b) & c) cmp d | a != b -> (0 cmp d | a != b)` > > and `(a ^ b) cmp c | a != b -> (0 cmp c | a != b)` were failing on some > > targets, like PowerPC. > > > > This patch moves the optimization to reassoc. Doing so, we can now handle > > cases where the related conditions appear in an AND expression too. Also, > > we can optimize cases where we have intermediate expressions between the > > related ones in the AND/OR expression on some targets. This is not handled > > on targets like PowerPC, where each condition of the AND/OR expression > > is placed into a different basic block. > > > > Bootstrapped/regtested on x86 and AArch64. > > > > PR tree-optimization/116860 > > > > gcc/ChangeLog: > > > > * match.pd: Remove the following patterns: > > ((a ^ b) & c) cmp d | a != b -> (0 cmp d | a != b) > > (a ^ b) cmp c | a != b -> (0 cmp c | a != b) > > I suspect removing them from match is wrong. > > > * tree-ssa-reassoc.cc (INCLUDE_SET): Include <set> for std::set. > > (INCLUDE_ALGORITHM): Include <algorithm> for std::set_intersection. > > (solve_expr): New function. > > (find_terminal_nodes): New function. > > (get_terminal_nodes): New function. > > (optimize_cmp_xor_exprs): New function. > > (optimize_range_tests): Call optimize_cmp_xor_exprs. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/tree-ssa/fold-xor-and-or.c: Renamed to fold-xor-and-or-1.c. > > * gcc.dg/tree-ssa/fold-xor-and-or-1.c: > > Add new test cases, remove logical-op-non-short-circuit=1. > > * gcc.dg/tree-ssa/fold-xor-or.c: Likewise. > > * gcc.dg/tree-ssa/fold-xor-and-or-2.c: New test. > > --- > > gcc/match.pd | 30 -- > > ...{fold-xor-and-or.c => fold-xor-and-or-1.c} | 42 ++- > > .../gcc.dg/tree-ssa/fold-xor-and-or-2.c | 55 +++ > > gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c | 42 ++- > > gcc/tree-ssa-reassoc.cc | 344 ++++++++++++++++++ > > 5 files changed, 465 insertions(+), 48 deletions(-) > > rename gcc/testsuite/gcc.dg/tree-ssa/{fold-xor-and-or.c => > > fold-xor-and-or-1.c} (50%) > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 5c679848bdf..b78ee6eaf4c 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3871,36 +3871,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > (if (types_match (type, TREE_TYPE (@0))) > > (bit_xor @0 { build_one_cst (type); } )))))) > > > > -/* ((a ^ b) & c) cmp d || a != b --> (0 cmp d || a != b). */ > > -(for cmp (simple_comparison) > > - (simplify > > - (bit_ior > > - (cmp:c > > - (bit_and:c > > - (bit_xor:c @0 @1) > > - tree_expr_nonzero_p@2) > > - @3) > > - (ne@4 @0 @1)) > > - (bit_ior > > - (cmp > > - { build_zero_cst (TREE_TYPE (@0)); } > > - @3) > > - @4))) > > - > > -/* (a ^ b) cmp c || a != b --> (0 cmp c || a != b). */ > > -(for cmp (simple_comparison) > > - (simplify > > - (bit_ior > > - (cmp:c > > - (bit_xor:c @0 @1) > > - @2) > > - (ne@3 @0 @1)) > > - (bit_ior > > - (cmp > > - { build_zero_cst (TREE_TYPE (@0)); } > > - @2) > > - @3))) > > - > > /* We can't reassociate at all for saturating types. */ > > (if (!TYPE_SATURATING (type)) > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c > > b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c > > similarity index 50% > > rename from gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c > > rename to gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c > > index 99e83d8e5aa..23edf9f4342 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c > > @@ -1,55 +1,79 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O3 -fdump-tree-optimized --param > > logical-op-non-short-circuit=1" } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > > > typedef unsigned long int uint64_t; > > > > -int cmp1(int d1, int d2) { > > +int cmp1_or(int d1, int d2) { > > if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2) > > return 0; > > return 1; > > } > > > > -int cmp2(int d1, int d2) { > > +int cmp2_or(int d1, int d2) { > > if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0) > > return 0; > > return 1; > > } > > > > -int cmp3(int d1, int d2) { > > +int cmp3_or(int d1, int d2) { > > if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1) > > return 0; > > return 1; > > } > > > > -int cmp4(int d1, int d2) { > > +int cmp4_or(int d1, int d2) { > > if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1))) > > return 0; > > return 1; > > } > > > > -int cmp1_64(uint64_t d1, uint64_t d2) { > > +int cmp1_and(int d1, int d2) { > > + if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_and(int d1, int d2) { > > + if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0)) > > + return 0; > > + return 1; > > +} > > + > > +int cmp1_or_64(uint64_t d1, uint64_t d2) { > > if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2) > > return 0; > > return 1; > > } > > > > -int cmp2_64(uint64_t d1, uint64_t d2) { > > +int cmp2_or_64(uint64_t d1, uint64_t d2) { > > if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0) > > return 0; > > return 1; > > } > > > > -int cmp3_64(uint64_t d1, uint64_t d2) { > > +int cmp3_or_64(uint64_t d1, uint64_t d2) { > > if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1) > > return 0; > > return 1; > > } > > > > -int cmp4_64(uint64_t d1, uint64_t d2) { > > +int cmp4_or_64(uint64_t d1, uint64_t d2) { > > if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1))) > > return 0; > > return 1; > > } > > > > +int cmp1_and_64(uint64_t d1, uint64_t d2) { > > + if (!(((d1 ^ d2) & 0xabcd) == 0) && d1 == d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_and_64(uint64_t d1, uint64_t d2) { > > + if (d1 == d2 && !(((d1 ^ d2) & 0xabcd) == 0)) > > + return 0; > > + return 1; > > +} > > + > > /* The if should be removed, so the condition should not exist */ > > /* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." > > "optimized" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c > > b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c > > new file mode 100644 > > index 00000000000..232adbd48d8 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c > > @@ -0,0 +1,55 @@ > > +/* { dg-do compile { target { aarch64-*-* x86_64-*-* } } } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > + > > +typedef unsigned long int uint64_t; > > + > > +int cmp1_or_inter(int d1, int d2, int d3) { > > + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_or_inter(int d1, int d2, int d3, int d4) { > > + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11) > > + return 0; > > + return 1; > > +} > > + > > +int cmp1_and_inter(int d1, int d2, int d3) { > > + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_and_inter(int d1, int d2, int d3, int d4) { > > + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11) > > + return 0; > > + return 1; > > +} > > + > > +int cmp1_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) { > > + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_or_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) { > > + if (((d1 ^ d2) & 0xabcd) == 0 || d3 != 10 || d1 != d2 || d4 == 11) > > + return 0; > > + return 1; > > +} > > + > > +int cmp1_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3) { > > + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_and_inter_64(uint64_t d1, uint64_t d2, uint64_t d3, uint64_t d4) { > > + if (!(((d1 ^ d2) & 0xabcd) == 0) && d3 == 10 && d1 == d2 && d4 != 11) > > + return 0; > > + return 1; > > +} > > + > > +/* The if should be removed, so the condition should not exist */ > > +/* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." > > "optimized" } } */ > > \ No newline at end of file > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c > > b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c > > index 51b7373af0d..5509cc67577 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c > > @@ -1,55 +1,79 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O3 -fdump-tree-optimized --param > > logical-op-non-short-circuit=1" } */ > > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > > > > typedef unsigned long int uint64_t; > > > > -int cmp1(int d1, int d2) { > > +int cmp1_or(int d1, int d2) { > > if ((d1 ^ d2) == 0xabcd || d1 != d2) > > return 0; > > return 1; > > } > > > > -int cmp2(int d1, int d2) { > > +int cmp2_or(int d1, int d2) { > > if (d1 != d2 || (d1 ^ d2) == 0xabcd) > > return 0; > > return 1; > > } > > > > -int cmp3(int d1, int d2) { > > +int cmp3_or(int d1, int d2) { > > if (0xabcd > (d2 ^ d1) || d2 != d1) > > return 0; > > return 1; > > } > > > > -int cmp4(int d1, int d2) { > > +int cmp4_or(int d1, int d2) { > > if (d2 != d1 || 0xabcd > (d2 ^ d1)) > > return 0; > > return 1; > > } > > > > -int cmp1_64(uint64_t d1, uint64_t d2) { > > +int cmp1_and(int d1, int d2) { > > + if (!((d1 ^ d2) == 0xabcd) && d1 == d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_and(int d1, int d2) { > > + if (d1 == d2 && !((d1 ^ d2) == 0xabcd)) > > + return 0; > > + return 1; > > +} > > + > > +int cmp1_64_or(uint64_t d1, uint64_t d2) { > > if ((d1 ^ d2) == 0xabcd || d1 != d2) > > return 0; > > return 1; > > } > > > > -int cmp2_64(uint64_t d1, uint64_t d2) { > > +int cmp2_64_or(uint64_t d1, uint64_t d2) { > > if (d1 != d2 || (d1 ^ d2) == 0xabcd) > > return 0; > > return 1; > > } > > > > -int cmp3_64(uint64_t d1, uint64_t d2) { > > +int cmp3_64_or(uint64_t d1, uint64_t d2) { > > if (0xabcd > (d2 ^ d1) || d2 != d1) > > return 0; > > return 1; > > } > > > > -int cmp4_64(uint64_t d1, uint64_t d2) { > > +int cmp4_64_or(uint64_t d1, uint64_t d2) { > > if (d2 != d1 || 0xabcd > (d2 ^ d1)) > > return 0; > > return 1; > > } > > > > +int cmp1_64_and(uint64_t d1, uint64_t d2) { > > + if (!((d1 ^ d2) == 0xabcd) && d1 == d2) > > + return 0; > > + return 1; > > +} > > + > > +int cmp2_64_and(uint64_t d1, uint64_t d2) { > > + if (d1 == d2 && !((d1 ^ d2) == 0xabcd)) > > + return 0; > > + return 1; > > +} > > + > > /* The if should be removed, so the condition should not exist */ > > /* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." > > "optimized" } } */ > > diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc > > index 4017eea3413..3c15cc44560 100644 > > --- a/gcc/tree-ssa-reassoc.cc > > +++ b/gcc/tree-ssa-reassoc.cc > > @@ -19,6 +19,8 @@ along with GCC; see the file COPYING3. If not see > > <http://www.gnu.org/licenses/>. */ > > > > #include "config.h" > > +#define INCLUDE_SET > > +#define INCLUDE_ALGORITHM /* std::set_intersection. */ > > #include "system.h" > > #include "coretypes.h" > > #include "backend.h" > > @@ -4077,6 +4079,347 @@ optimize_range_tests_var_bound (enum tree_code > > opcode, int first, int length, > > return any_changes; > > } > > > > +/* Helper function for optimize_cmp_xor_exprs. Visit EXPR operands > > + recursively and try to find comparison or XOR expressions that can be > > + solved using the expressions in CALC_STMTS. Expressions that can be > > folded > > + to 0 are stored in STMTS_TO_FOLD. IS_OR_EXPR is true for OR expressions > > + and false for AND expressions. */ > > + > > +tree > > +solve_expr (tree expr, std::set<gimple *> *calc_stmts, > > + std::set<gimple *> *stmts_to_fold, std::set<tree> *visited, > > + bool is_or_expr) > > +{ > > + /* Return, if have already visited this expression or the expression is > > not > > + an SSA name. */ > > + if ((visited->find (expr) != visited->end ()) > > + || (TREE_CODE (expr) != SSA_NAME)) > > + return NULL_TREE; > > + > > + visited->insert (expr); > > + > > + gimple *def_stmt = SSA_NAME_DEF_STMT (expr); > > + > > + if (!def_stmt || !is_gimple_assign (def_stmt)) > > + return expr; > > + > > + unsigned int op_num = gimple_num_ops (def_stmt); > > + unsigned int terminal_node_num = 0; > > + /* Visit the expression operands recursively until finding a statement > > that > > + all of its operands are terminal nodes. */ > > + for (unsigned i = 1; i < op_num; ++i) > > + { > > + tree op = gimple_op (def_stmt, i); > > + if (!op) continue; > > + tree solve_result = solve_expr (op, calc_stmts, stmts_to_fold, > > visited, > > + is_or_expr); > > + if (solve_result == op) > > + terminal_node_num++; > > + } > > + > > + /* Check if all of the operands are terminal nodes. */ > > + if (terminal_node_num != (op_num - 1)) > > + return NULL_TREE; > > + > > + tree_code def_code = gimple_assign_rhs_code (def_stmt); > > + /* XOR and NE expressions are handled in a similar manner. */ > > + if (def_code == BIT_XOR_EXPR) > > + def_code = NE_EXPR; > > + > > + tree def_lhs = gimple_assign_lhs (def_stmt); > > + tree def_op1 = gimple_assign_rhs1 (def_stmt); > > + tree def_op2 = gimple_assign_rhs2 (def_stmt); > > + tree def_op3 = gimple_assign_rhs3 (def_stmt); > > + > > + /* Find possible statements in calc_stmts that can solve the current > > + expression. We are looking for statements with the same operation and > > + the same operands as the current one in case of an OR expression, or > > + a statement using the inverse operation of the current one, with the > > same > > + operands, in case of an AND expression. */ > > + for (gimple *stmt : *calc_stmts) > > + { > > + tree_code stmt_rhs_code = gimple_assign_rhs_code (stmt); > > + tree_code inverted_code = invert_tree_comparison (stmt_rhs_code, > > + HONOR_NANS (TREE_TYPE > > (expr))); > > + if (((is_or_expr && def_code == stmt_rhs_code) > > + || (!is_or_expr && def_code == inverted_code)) > > + && gimple_assign_lhs (stmt) != def_lhs > > + && gimple_assign_rhs1 (stmt) == def_op1 > > + && gimple_assign_rhs2 (stmt) == def_op2 > > + && gimple_assign_rhs3 (stmt) == def_op3) > > + { > > + /* In case of an AND expression, where the related terms are in > > + different blocks, fold the term that is dominated by the > > + other. This ensures the correct handling of cases where > > + a related term may not be part of the AND expression, but > > + only happens to be inside the `if` statement's block. */ > > + if (is_or_expr > > + || (gimple_bb (stmt) == gimple_bb (def_stmt)) > > + || reassoc_stmt_dominates_stmt_p (stmt, def_stmt)) > > + { > > + stmts_to_fold->insert (def_stmt); > > + calc_stmts->erase (def_stmt); > > + } > > + else if (reassoc_stmt_dominates_stmt_p (def_stmt, stmt)) > > + { > > + stmts_to_fold->insert (stmt); > > + calc_stmts->erase (stmt); > > + } > > + > > + return expr; > > + } > > + } > > + > > + return NULL_TREE; > > +} > > + > > +/* Helper function for optimize_cmp_xor_exprs. Unfold EXPR and get the > > + terminal nodes in which it is analyzed. */ > > + > > +void > > +find_terminal_nodes (tree expr, std::set<tree> *terminal_nodes, > > + std::set<tree> *visited) > > Instead of std::set<tree> you most likely want to use hash_set<tree> instead. > > > +{ > > + if (visited->find (expr) != visited->end ()) > > + return; > > + > > + visited->insert (expr); > > + > > + if (TREE_CODE (expr) != SSA_NAME) > > + { > > + terminal_nodes->insert (expr); > > + return; > > + } > > + > > + gimple *def_stmt = SSA_NAME_DEF_STMT (expr); > > + > > + if (is_gimple_debug (def_stmt)) > > + return; > > + > > + if (!def_stmt || !is_gimple_assign (def_stmt)) > > + { > > + terminal_nodes->insert (expr); > > + return; > > + } > > + > > + /* Visit the expression operands recursively. */ > > + unsigned int op_num = gimple_num_ops (def_stmt); > > + for (unsigned i = 1; i < op_num; ++i) > > + { > > + tree op = gimple_op (def_stmt, i); > > + if (!op) continue; > > + find_terminal_nodes (op, terminal_nodes, visited); > > + } > > +} > > + > > +/* Helper function for optimize_cmp_xor_exprs. Check if the terminal nodes > > + for EXPR have been calculated before, otherwise find them. */ > > + > > +std::set<tree> > > +get_terminal_nodes (tree expr, auto_vec<std::set<tree>> *terminal_nodes, > > + unsigned int index) > > +{ > > + if (terminal_nodes->length () > index) > > + return (*terminal_nodes)[index]; > > + else > > + { > > + std::set<tree> visited; > > + std::set<tree> expr_terminal_nodes; > > + > > + find_terminal_nodes (expr, &expr_terminal_nodes, &visited); > > + terminal_nodes->safe_push (expr_terminal_nodes); > > + return expr_terminal_nodes; > > + } > > +} > > + > > +/* Optimize boolean expressions containing comparisons or xor expressions > > and > > + the value of one term in the expression implies the value of another, > > like > > + the following: > > + ((d1 ^ d2) & 0xabcd) == 0 | d1 != d2 --> (0 & 0xabcd) == 0 | d1 != d2, > > + which will later be simplified to true. > > + (d1 ^ d2) == 0xabcd | d1 != d2 --> 0 == 0xabcd | d1 != d2, > > + which will later be simplified to d1 != d2. > > + ((d1 ^ d2) & 0xabcd) == 0 | d3 != 10 | d1 != d2 --> > > + (0 & 0xabcd) == 0 | d3 != 10 | d1 != d2, > > + which will later be simplified to true. */ > > + > > +static bool > > +optimize_cmp_xor_exprs (tree_code opcode, vec<operand_entry *> *ops) > > +{ > > + std::set<std::set<tree>> op_subexprsets; > > + std::set<tree> terms_in_preds; > > + bool is_or_expr = opcode == BIT_IOR_EXPR; > > + bool any_changes = false; > > + > > + if (!is_or_expr && (opcode != BIT_AND_EXPR)) > > + return false; > > + > > + /* Iterate over the operands in the AND/OR expression and keep those that > > + are SSA names. */ > > + std::set<tree> expr_terms; > > + for (operand_entry *oe : ops) > > + { > > + tree op = oe->op; > > + if (TREE_CODE (op) == SSA_NAME) > > + expr_terms.insert (op); > > + } > > + > > + /* Find related terms to the AND/OR expression in the current block's > > + predecessors. */ > > + if (expr_terms.size () > 0) > > + { > > + edge e; > > + edge_iterator ei; > > + tree op = *(expr_terms.begin ()); > > + gimple *op_def = SSA_NAME_DEF_STMT (op); > > + basic_block curr_bb; > > + > > + if (op_def && (curr_bb = gimple_bb (op_def))) > > + { > > + FOR_EACH_EDGE (e, ei, curr_bb->preds) > > + { > > + basic_block pred = e->src; > > + gimple_stmt_iterator gsi = gsi_last_bb (pred); > > + gimple *last_stmt = gsi_stmt (gsi); > > + > > + if (!last_stmt || (gimple_code (last_stmt) != GIMPLE_COND)) > > + continue; > > + > > + tree_code cond_code = gimple_cond_code (last_stmt); > > + tree cond_lhs = gimple_cond_lhs (last_stmt); > > + > > + if ((cond_code == EQ_EXPR || cond_code == NE_EXPR) > > + && TREE_CODE (cond_lhs) == SSA_NAME > > + && (integer_zerop (gimple_cond_rhs (last_stmt))) > > + && (EDGE_COUNT (pred->succs) > 1)) > > + { > > + edge t = EDGE_SUCC (pred, 0); > > + edge e = EDGE_SUCC (pred, 1); > > + > > + if (!(t->flags & EDGE_TRUE_VALUE)) > > + std::swap (t, e); > > + > > + if ((is_or_expr && e->dest == curr_bb) > > + || (!is_or_expr && t->dest == curr_bb)) > > + terms_in_preds.insert (cond_lhs); > > + } > > + } > > + } > > + } > > + > > + expr_terms.insert (terms_in_preds.begin (), terms_in_preds.end ()); > > + > > + /* Initialize sets of related expressions. */ > > + auto_vec<std::set<tree>> terminal_nodes; > > + unsigned int i = 0; > > + for (std::set<tree>::iterator it = expr_terms.begin (); > > + it != expr_terms.end (); ++it, ++i) > > + { > > + tree op = *it; > > + std::set<tree> related_terms = {op}; > > + > > + std::set<tree> op_terminal_nodes = get_terminal_nodes (op, > > + &terminal_nodes, i); > > + > > + unsigned int j = i + 1; > > + for (std::set<tree>::iterator next_it = std::next (it); > > + next_it != expr_terms.end (); ++next_it, ++j) > > + { > > + tree next_op = *next_it; > > + std::set<tree> next_op_term_nodes = get_terminal_nodes (next_op, > > + &terminal_nodes, j); > > + std::set<tree> intersection; > > + std::set_intersection (op_terminal_nodes.begin (), > > + op_terminal_nodes.end (), > > + next_op_term_nodes.begin (), > > + next_op_term_nodes.end (), > > + std::inserter (intersection, > > + intersection.begin ())); > > With hash_set, you can't use set_intersection since hash tables > iterators are not sorted. But you do your own intersection here. > > Thanks, > Andrew Pinski > > > + if (intersection.size () > 1) > > + related_terms.insert (next_op); > > + } > > + > > + op_subexprsets.insert (related_terms); > > + } > > + > > + /* Iterate over op_subexprsets, analyzing and trying to fold the > > expressions > > + in each set of related expressions until reaching a fixed-point. */ > > + > > + auto_vec<tree> solved_exprs; > > + for (std::set<tree> expr_set : op_subexprsets) > > + { > > + > > + if (expr_set.size () < 2) continue; > > + > > + std::set<gimple *> calc_stmts; > > + std::set<gimple *> stmts_to_fold; > > + bool any_change; > > + > > + do { > > + any_change = false; > > + for (tree subexpr : expr_set) > > + { > > + gimple *def_stmt = SSA_NAME_DEF_STMT (subexpr); > > + if (!is_gimple_assign (def_stmt)) > > + continue; > > + > > + /* If the expression's def is an EQ or NE expression, store it > > in > > + calc_stmts in order to use it to solve more complex > > + expressions. */ > > + tree_code def_stmt_code = gimple_assign_rhs_code (def_stmt); > > + if ((def_stmt_code == EQ_EXPR || def_stmt_code == NE_EXPR) > > + && (calc_stmts.find (def_stmt) == calc_stmts.end ()) > > + && (stmts_to_fold.find (def_stmt) == stmts_to_fold.end ())) > > + { > > + calc_stmts.insert (def_stmt); > > + any_change = true; > > + } > > + else > > + { > > + std::set<tree> visited; > > + if (solve_expr (subexpr, &calc_stmts, &stmts_to_fold, > > + &visited, is_or_expr)) > > + solved_exprs.safe_push (subexpr); > > + } > > + } > > + } while (any_change); > > + > > + for (gimple *stmt : stmts_to_fold) > > + { > > + tree stmt_lhs = gimple_assign_lhs (stmt); > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > + { > > + fprintf (dump_file, "Folding "); > > + print_generic_expr (dump_file, stmt_lhs); > > + fprintf (dump_file, " to 0\n"); > > + } > > + > > + operand_entry *oe; > > + unsigned int i; > > + tree zero = build_zero_cst (TREE_TYPE (stmt_lhs)); > > + FOR_EACH_VEC_ELT (*ops, i, oe) > > + { > > + if (oe->op == stmt_lhs) > > + { > > + oe->op = zero; > > + reassociate_stats.ops_eliminated += ops->length () - 1; > > + ops->truncate (0); > > + ops->quick_push (oe); > > + } > > + } > > + > > + gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt); > > + gsi_remove (&stmt_gsi, true); > > + replace_uses_by (stmt_lhs, zero); > > + release_defs (stmt); > > + > > + any_changes = true; > > + } > > + } > > + > > + return any_changes; > > +} > > + > > /* Optimize range tests, similarly how fold_range_test optimizes > > it on trees. The tree code for the binary > > operation between all the operands is OPCODE. > > @@ -4170,6 +4513,7 @@ optimize_range_tests (enum tree_code opcode, > > ranges, first_bb); > > any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, > > ops, ranges); > > + any_changes |= optimize_cmp_xor_exprs (opcode, ops); > > > > if (any_changes && opcode != ERROR_MARK) > > { > > -- > > 2.47.0 > >