Hi, I have attached revamped version of Kugan's patch (https://gcc.gnu.org/ml/gcc/2013-06/msg00100.html) for PR43721. Test-case: http://pastebin.com/QMfpXLD9 divmod pass dump: http://pastebin.com/yMY1ikCp Assembly: http://pastebin.com/kk2HZpvA
The approach I took is similar to sincos pass, which involves two parts: a) divmod pass that transforms: t1 = a / b; t2 = a % b; to the following sequence: complex_tmp = DIVMOD (a, b); t1 = REALPART_EXPR(a); t2 = IMAGPART_EXPR(b); b) DIVMOD(a, b) is represented as an internal-fn and is expanded by expand_DIVMOD(). I am not sure if I have done this correctly. I was somehow looking to reuse expand_divmod() but am not able to figure out how to do that (AFAIU expand_divmod() strictly returns either the quotient or remainder but never both which is what I want), so ended up with manually expanding to rtx. While going through the sincos pass in execute_cse_sincos_1, I didn't understand the following: if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) cfg_changed = true; Um why is the call to gimple_purge_dead_eh_edges necessary here? A silly question, what options to pass to gcc to print statistics ? I added statistics_counter_event to keep track of number of calls inserted/used but not sure how to print them :/ I would be grateful for suggestions for improving the patch. Thank you, Prathamesh
2015-10-30 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> * internal-fn.c (expand_DIVMOD): New internal function. * internal-fn.def: Add entry for expand_DIVMOD. * passes.def (pass_divmod): New pass. * tree-pass.h (make_pass_divmod): Declare. * tree-ssa-math-opts.c: Implement pass_divmod.
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index f12d3af..5fd95f2 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -1958,6 +1958,72 @@ expand_VA_ARG (gcall *stmt ATTRIBUTE_UNUSED) gcc_unreachable (); } +static void +expand_DIVMOD (gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + + gcc_assert (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE); + tree type = TREE_TYPE (TREE_TYPE (lhs)); + machine_mode mode = TYPE_MODE (type); + optab tab = (TYPE_UNSIGNED (type)) ? udivmod_optab : sdivmod_optab; + + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + + /* Check if optab handler exists for udivmod/sdivmod. */ + if (optab_handler (tab, mode) != CODE_FOR_nothing) + { + rtx quotient = gen_reg_rtx (mode); + rtx remainder = gen_reg_rtx (mode); + expand_twoval_binop (tab, op0, op1, quotient, remainder, TYPE_UNSIGNED (type)); + + /* Wrap the return value (quotient, remaineder) within COMPLEX_EXPR */ + expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs), + make_tree (TREE_TYPE (arg0), quotient), + make_tree (TREE_TYPE (arg1), remainder)), + target, VOIDmode, EXPAND_NORMAL); + + return; + } + + rtx libfunc = NULL_RTX; + + machine_mode compute_mode; + for (compute_mode = mode; + compute_mode != VOIDmode; + compute_mode = GET_MODE_WIDER_MODE (compute_mode)) + { + libfunc = optab_libfunc (tab, compute_mode); + if (libfunc != NULL_RTX) + break; + } + + gcc_assert (libfunc != NULL_RTX); + + if (compute_mode != mode) + { + op0 = convert_modes (compute_mode, GET_MODE (op0), op0, tab); + op1 = convert_modes (compute_mode, GET_MODE (op1), op1, tab); + } + + machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (compute_mode), MODE_INT); + rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, + libval_mode, 2, op0, compute_mode, op1, compute_mode); + + rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0); + rtx remainder = simplify_gen_subreg (mode, libval, libval_mode, GET_MODE_SIZE (compute_mode)); + + /* Wrap the return value (quotient, remaineder) within COMPLEX_EXPR */ + expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs), + make_tree (TREE_TYPE (arg0), quotient), + make_tree (TREE_TYPE (arg1), remainder)), + target, VOIDmode, EXPAND_NORMAL); +} + /* Routines to expand each internal function, indexed by function number. Each routine has the prototype: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 305cf1b..1d6fbab 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -65,3 +65,4 @@ DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL) +DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL) diff --git a/gcc/passes.def b/gcc/passes.def index dc3f44c..896623a 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -272,6 +272,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_simduid_cleanup); NEXT_PASS (pass_lower_vector_ssa); NEXT_PASS (pass_cse_reciprocals); + NEXT_PASS (pass_divmod); NEXT_PASS (pass_reassoc); NEXT_PASS (pass_strength_reduction); NEXT_PASS (pass_tracer); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index c37e4b2..643acb3 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -421,6 +421,7 @@ extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt); extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt); extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_divmod (gcc::context *ctxt); extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt); extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt); diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 9223642..1b414e4 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -3630,3 +3630,231 @@ make_pass_optimize_widening_mul (gcc::context *ctxt) { return new pass_optimize_widening_mul (ctxt); } + +namespace { + +struct divmod_stats_ +{ + int n_calls; + int n_used; + + divmod_stats_ (): n_calls (0), n_used (0) {} +} divmod_stats; + +const pass_data pass_data_divmod = +{ + GIMPLE_PASS, /* type */ + "divmod", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_divmod : public gimple_opt_pass +{ +public: + pass_divmod (gcc::context *ctxt) + : gimple_opt_pass (pass_data_divmod, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return optimize; + } + + virtual unsigned int execute (function *); + +private: + unsigned execute_divmod_1 (gimple *); + void maybe_record_stmt (vec<gimple *>& stmts, basic_block& top_bb, gimple *use_stmt); + gimple_stmt_iterator get_later_stmt (const basic_block& top_bb, gimple *stmt1, gimple *stmt2); + +}; // class pass_divmod + +void +pass_divmod::maybe_record_stmt (vec<gimple *>& stmts, basic_block& top_bb, gimple *use_stmt) +{ + /* FIXME: Use same condition for adding use_stmt to vector as in sincos ? */ + maybe_record_sincos (&stmts, &top_bb, use_stmt); +} + +/* Return gsi for stmt that occurs later. */ + +gimple_stmt_iterator +pass_divmod::get_later_stmt (const basic_block& top_bb, gimple *stmt1, gimple *stmt2) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_after_labels (top_bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (gsi_stmt (gsi) == stmt1) + return gsi_for_stmt (stmt2); + else if (gsi_stmt (gsi) == stmt2) + return gsi_for_stmt (stmt1); + + gcc_unreachable (); +} + +/* Pass operates in two phases: + * a) Collect all stmts with TRUNC_DIV_EXPR/TRUNC_MOD_EXPR having same operands into vec stmts; + * b) Create divmod library call, and update statements in stmts vector to use the divmod's return value. + */ + +unsigned +pass_divmod::execute_divmod_1 (gimple *stmt) +{ + enum tree_code code = (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR) ? TRUNC_MOD_EXPR : TRUNC_DIV_EXPR; + + vec<gimple *> stmts = vNULL; + stmts.safe_push (stmt); + basic_block top_bb = gimple_bb (stmt); + + tree op1 = gimple_assign_rhs1 (stmt); + tree op2 = gimple_assign_rhs2 (stmt); + + use_operand_p use_p; + imm_use_iterator use_iter; + + FOR_EACH_IMM_USE_FAST (use_p, use_iter, op1) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_assign (use_stmt) + && gimple_assign_rhs_code (use_stmt) == code) + { + tree u_op1 = gimple_assign_rhs1 (use_stmt); + tree u_op2 = gimple_assign_rhs2 (use_stmt); + + if ((operand_equal_p (op1, u_op1, 0) && operand_equal_p (op2, u_op2, 0)) + || (operand_equal_p (op1, u_op2, 0) && operand_equal_p (op2, u_op1, 0))) + maybe_record_stmt (stmts, top_bb, use_stmt); + } + } + + if (stmts.is_empty ()) + return false; + + /* Create the library call. */ + gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); + tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (gimple_assign_lhs (stmt))), call_stmt, "divmod_tmp"); + gimple_call_set_lhs (call_stmt, res); + + divmod_stats.n_calls++; + + /* Insert the call-stmt. */ + gimple *def_stmt_op1 = SSA_NAME_DEF_STMT (op1); + bool op1_def_in_bb = + (!SSA_NAME_IS_DEFAULT_DEF (op1) + && gimple_code (def_stmt_op1) != GIMPLE_PHI + && gimple_bb (def_stmt_op1) == top_bb); + + gimple *def_stmt_op2 = SSA_NAME_DEF_STMT (op2); + bool op2_def_in_bb = + (!SSA_NAME_IS_DEFAULT_DEF (op2) + && gimple_code (def_stmt_op2) != GIMPLE_PHI + && gimple_bb (def_stmt_op2) == top_bb); + + /* 3 cases arise where to insert the call to divmod depending upon where op1 and op2 are defined wrt top_bb: + * Case 1: When both def are outside top_bb, simply insert after labels */ + if (!op1_def_in_bb && !op2_def_in_bb) + { + gcc_assert (top_bb != 0); + gimple_stmt_iterator gsi = gsi_after_labels (top_bb); + gsi_insert_before (&gsi, call_stmt, GSI_SAME_STMT); + } + + /* Case 2: When both are in top_bb */ + else if (op1_def_in_bb && op2_def_in_bb) + { + /* FIXME: better way to compare iterators?. */ + gimple_stmt_iterator gsi = get_later_stmt (top_bb, def_stmt_op1, def_stmt_op2); + gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); + } + + /* Case 3: When one def is inside top_bb and one is outside. */ + else + { + gimple_stmt_iterator gsi; + + if (op1_def_in_bb) + gsi = gsi_for_stmt (def_stmt_op1); + else + gsi = gsi_for_stmt (def_stmt_op2); + + gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); + } + + /* Update stmts. */ + bool cfg_changed = false; + gimple *use_stmt; + for (unsigned i = 0; i < stmts.length (); ++i) + { + tree rhs; + use_stmt = stmts[i]; + + switch (gimple_assign_rhs_code (use_stmt)) + { + case TRUNC_DIV_EXPR: + rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); + break; + + case TRUNC_MOD_EXPR: + rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); + break; + + default: + gcc_unreachable (); + } + + gassign *assign_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), rhs); + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); + gsi_replace (&gsi, assign_stmt, true); + if (gimple_purge_dead_eh_edges (gimple_bb (assign_stmt))) + cfg_changed = true; + + divmod_stats.n_used++; + } + + stmts.release (); + return cfg_changed; +} + +unsigned +pass_divmod::execute (function *fun) +{ + calculate_dominance_info (CDI_DOMINATORS); + basic_block bb; + + bool cfg_changed = false; + + FOR_EACH_BB_FN (bb, fun) + { + for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_assign (stmt) + && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_binary) + { + if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR) + cfg_changed |= execute_divmod_1 (stmt); + } + } + } + + statistics_counter_event (fun, "number of divmod calls inserted", divmod_stats.n_calls); + statistics_counter_event (fun, "number of divmod calls used", divmod_stats.n_used); + + free_dominance_info (CDI_DOMINATORS); + return cfg_changed ? TODO_cleanup_cfg : 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_divmod (gcc::context *ctxt) +{ + return new pass_divmod (ctxt); +}