This patch gives up on using the reassociation rank algorithm to correctly place __builtin_powi calls and their feeding multiplies. In the end this proved to introduce more complexity than it saved, due in part to the poor fit of introducing DAG expressions into the reassociated operand tree. This patch returns to generating explicit multiplies to bind the builtin calls together and to the results of the expression tree rewrite. I feel this version is smaller, easier to understand, and less fragile than the existing code.
Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new regressions. Ok for trunk? Thanks, Bill 2012-05-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> * tree-ssa-reassoc.c (bip_map): Remove decl. (completely_remove_stmt): Remove function. (remove_def_if_absorbed_call): Remove function. (remove_visited_stmt_chain): Remove __builtin_powi handling. (possibly_move_powi): Remove function. (rewrite_expr_tree): Remove calls to possibly_move_powi. (rewrite_expr_tree_parallel): Likewise. (attempt_builtin_powi): Build multiplies explicitly rather than relying on the ops vector and rank system. (transform_stmt_to_copy): New function. (transform_stmt_to_multiply): Likewise. (reassociate_bb): Handle leftover operations after __builtin_powi optimization; build a final multiply if necessary. Index: gcc/tree-ssa-reassoc.c =================================================================== --- gcc/tree-ssa-reassoc.c (revision 187626) +++ gcc/tree-ssa-reassoc.c (working copy) @@ -200,10 +200,6 @@ static long *bb_rank; /* Operand->rank hashtable. */ static struct pointer_map_t *operand_rank; -/* Map from inserted __builtin_powi calls to multiply chains that - feed them. */ -static struct pointer_map_t *bip_map; - /* Forward decls. */ static long get_rank (tree); @@ -2184,32 +2180,6 @@ is_phi_for_stmt (gimple stmt, tree operand) return false; } -/* Remove STMT, unlink its virtual defs, and release its SSA defs. */ - -static inline void -completely_remove_stmt (gimple stmt) -{ - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); - gsi_remove (&gsi, true); - unlink_stmt_vdef (stmt); - release_defs (stmt); -} - -/* If OP is defined by a builtin call that has been absorbed by - reassociation, remove its defining statement completely. */ - -static inline void -remove_def_if_absorbed_call (tree op) -{ - gimple stmt; - - if (TREE_CODE (op) == SSA_NAME - && has_zero_uses (op) - && is_gimple_call ((stmt = SSA_NAME_DEF_STMT (op))) - && gimple_visited_p (stmt)) - completely_remove_stmt (stmt); -} - /* Remove def stmt of VAR if VAR has zero uses and recurse on rhs1 operand if so. */ @@ -2218,7 +2188,6 @@ remove_visited_stmt_chain (tree var) { gimple stmt; gimple_stmt_iterator gsi; - tree var2; while (1) { @@ -2228,95 +2197,15 @@ remove_visited_stmt_chain (tree var) if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) { var = gimple_assign_rhs1 (stmt); - var2 = gimple_assign_rhs2 (stmt); gsi = gsi_for_stmt (stmt); gsi_remove (&gsi, true); release_defs (stmt); - /* A multiply whose operands are both fed by builtin pow/powi - calls must check whether to remove rhs2 as well. */ - remove_def_if_absorbed_call (var2); } - else if (is_gimple_call (stmt) && gimple_visited_p (stmt)) - { - completely_remove_stmt (stmt); - return; - } else return; } } -/* If OP is an SSA name, find its definition and determine whether it - is a call to __builtin_powi. If so, move the definition prior to - STMT. Only do this during early reassociation. */ - -static void -possibly_move_powi (gimple stmt, tree op) -{ - gimple stmt2, *mpy; - tree fndecl; - gimple_stmt_iterator gsi1, gsi2; - - if (!first_pass_instance - || !flag_unsafe_math_optimizations - || TREE_CODE (op) != SSA_NAME) - return; - - stmt2 = SSA_NAME_DEF_STMT (op); - - if (!is_gimple_call (stmt2) - || !has_single_use (gimple_call_lhs (stmt2))) - return; - - fndecl = gimple_call_fndecl (stmt2); - - if (!fndecl - || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) - return; - - switch (DECL_FUNCTION_CODE (fndecl)) - { - CASE_FLT_FN (BUILT_IN_POWI): - break; - default: - return; - } - - /* Move the __builtin_powi. */ - gsi1 = gsi_for_stmt (stmt); - gsi2 = gsi_for_stmt (stmt2); - gsi_move_before (&gsi2, &gsi1); - - /* See if there are multiplies feeding the __builtin_powi base - argument that must also be moved. */ - while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL) - { - /* If we've already moved this statement, we're done. This is - identified by a NULL entry for the statement in bip_map. */ - gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy); - if (next && !*next) - return; - - stmt = stmt2; - stmt2 = *mpy; - gsi1 = gsi_for_stmt (stmt); - gsi2 = gsi_for_stmt (stmt2); - gsi_move_before (&gsi2, &gsi1); - - /* The moved multiply may be DAG'd from multiple calls if it - was the result of a cached multiply. Only move it once. - Rank order ensures we move it to the right place the first - time. */ - if (next) - *next = NULL; - else - { - next = (gimple *) pointer_map_insert (bip_map, *mpy); - *next = NULL; - } - } -} - /* This function checks three consequtive operands in passed operands vector OPS starting from OPINDEX and swaps two operands if it is profitable for binary operation @@ -2421,9 +2310,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind fprintf (dump_file, " into "); print_gimple_stmt (dump_file, stmt, 0, 0); } - - possibly_move_powi (stmt, oe1->op); - possibly_move_powi (stmt, oe2->op); } return; } @@ -2469,8 +2355,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind fprintf (dump_file, " into "); print_gimple_stmt (dump_file, stmt, 0, 0); } - - possibly_move_powi (stmt, oe->op); } /* Recurse on the LHS of the binary operator, which is guaranteed to be the non-leaf side. */ @@ -2644,9 +2528,6 @@ rewrite_expr_tree_parallel (gimple stmt, int width fprintf (dump_file, " into "); print_gimple_stmt (dump_file, stmts[i], 0, 0); } - - possibly_move_powi (stmts[i], op1); - possibly_move_powi (stmts[i], op2); } remove_visited_stmt_chain (last_rhs1); @@ -3222,11 +3103,10 @@ get_reassoc_pow_ssa_name (tree *target, tree type) /* Look for repeated operands in OPS in the multiply tree rooted at STMT. Replace them with an optimal sequence of multiplies and powi - builtin calls, and remove the used operands from OPS. Push new - SSA names onto OPS that represent the introduced multiplies and - builtin calls. */ + builtin calls, and remove the used operands from OPS. Return an + SSA name representing the value of the replacement sequence. */ -static void +static tree attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops, tree *target) { @@ -3235,6 +3115,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent operand_entry_t oe; repeat_factor_t rf1, rf2; repeat_factor rfnew; + tree result = NULL_TREE; tree target_ssa, iter_result; tree type = TREE_TYPE (gimple_get_lhs (stmt)); tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); @@ -3244,7 +3125,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and target. */ if (!powi_fndecl) - return; + return NULL_TREE; /* Allocate the repeated factor vector. */ repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10); @@ -3315,7 +3196,6 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent while (true) { HOST_WIDE_INT power; - gimple last_mul = NULL; /* First look for the largest cached product of factors from preceding iterations. If found, create a builtin_powi for @@ -3353,25 +3233,14 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent } else { - gimple *value; - iter_result = get_reassoc_pow_ssa_name (target, type); pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, build_int_cst (integer_type_node, power)); gimple_call_set_lhs (pow_stmt, iter_result); gimple_set_location (pow_stmt, gimple_location (stmt)); - /* Temporarily place the call; we will move it and its - feeding multiplies to the correct place during - rewrite_expr. */ gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); - if (!operand_equal_p (rf1->repr, rf1->factor, 0)) - { - value = (gimple *) pointer_map_insert (bip_map, pow_stmt); - *value = SSA_NAME_DEF_STMT (rf1->repr); - } - if (dump_file && (dump_flags & TDF_DETAILS)) { unsigned elt; @@ -3457,15 +3326,6 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); rf1->repr = target_ssa; - /* Chain multiplies together for later movement. */ - if (last_mul) - { - gimple *value - = (gimple *) pointer_map_insert (bip_map, mul_stmt); - *value = last_mul; - } - last_mul = mul_stmt; - /* Don't reprocess the multiply we just introduced. */ gimple_set_visited (mul_stmt, true); } @@ -3481,20 +3341,23 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent gimple_call_set_lhs (pow_stmt, iter_result); gimple_set_location (pow_stmt, gimple_location (stmt)); gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); + } - /* If we inserted a chain of multiplies before the pow_stmt, - record that fact so we can move it later when we move the - pow_stmt. */ - if (last_mul) - { - gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt); - *value = last_mul; - } + /* If we previously formed at least one other builtin_powi call, + form the product of this one and those others. */ + if (result) + { + tree new_result = get_reassoc_pow_ssa_name (target, type); + mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result, + result, iter_result); + gimple_set_location (mul_stmt, gimple_location (stmt)); + gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); + gimple_set_visited (mul_stmt, true); + result = new_result; } + else + result = iter_result; - /* Append the result of this iteration to the ops vector. */ - add_to_ops_vec (ops, iter_result); - /* Decrement the occurrence count of each element in the product by the count found above, and remove this many copies of each factor from OPS. */ @@ -3534,8 +3397,61 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent clean up. */ VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank); VEC_free (repeat_factor, heap, repeat_factor_vec); + + /* Return the final product computed herein. Note that there may + still be some elements with single occurrence count left in OPS; + those will be handled by the normal reassociation logic. */ + return result; } +/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ + +static void +transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) +{ + tree rhs1; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + rhs1 = gimple_assign_rhs1 (stmt); + gimple_assign_set_rhs_from_tree (gsi, new_rhs); + update_stmt (stmt); + remove_visited_stmt_chain (rhs1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " into "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } +} + +/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ + +static void +transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, + tree rhs1, tree rhs2) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + gimple_assign_set_rhs_with_ops_1 (gsi, MULT_EXPR, rhs1, rhs2, NULL_TREE); + update_stmt (gsi_stmt (*gsi)); + remove_visited_stmt_chain (rhs1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " into "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } +} + /* Reassociate expressions in basic block BB and its post-dominator as children. */ @@ -3606,7 +3522,7 @@ reassociate_bb (basic_block bb) if (associative_tree_code (rhs_code)) { VEC(operand_entry_t, heap) *ops = NULL; - bip_map = pointer_map_create (); + tree powi_result = NULL_TREE; /* There may be no immediate uses left by the time we get here because we may have eliminated them all. */ @@ -3630,28 +3546,21 @@ reassociate_bb (basic_block bb) if (first_pass_instance && rhs_code == MULT_EXPR && flag_unsafe_math_optimizations) - attempt_builtin_powi (stmt, &ops, &target); + powi_result = attempt_builtin_powi (stmt, &ops, &target); - if (VEC_length (operand_entry_t, ops) == 1) + /* If the operand vector is now empty, all operands were + consumed by the __builtin_powi optimization. */ + if (VEC_length (operand_entry_t, ops) == 0) + transform_stmt_to_copy (&gsi, stmt, powi_result); + else if (VEC_length (operand_entry_t, ops) == 1) { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Transforming "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - rhs1 = gimple_assign_rhs1 (stmt); - gimple_assign_set_rhs_from_tree (&gsi, - VEC_last (operand_entry_t, - ops)->op); - update_stmt (stmt); - remove_visited_stmt_chain (rhs1); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " into "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } + tree last_op = VEC_last (operand_entry_t, ops)->op; + + if (powi_result) + transform_stmt_to_multiply (&gsi, stmt, last_op, + powi_result); + else + transform_stmt_to_copy (&gsi, stmt, last_op); } else { @@ -3668,10 +3577,27 @@ reassociate_bb (basic_block bb) rewrite_expr_tree_parallel (stmt, width, ops); else rewrite_expr_tree (stmt, 0, ops, false); + + /* If we combined some repeated factors into a + __builtin_powi call, multiply that result by the + reassociated operands. */ + if (powi_result) + { + gimple mul_stmt; + tree type = TREE_TYPE (gimple_get_lhs (stmt)); + tree target_ssa = get_reassoc_pow_ssa_name (&target, + type); + gimple_set_lhs (stmt, target_ssa); + update_stmt (stmt); + mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs, + powi_result, + target_ssa); + gimple_set_location (mul_stmt, gimple_location (stmt)); + gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); + } } VEC_free (operand_entry_t, heap, ops); - pointer_map_destroy (bip_map); } } }