Hi, while looking into cunroll issues I noticed that we miss quite lot of important unrolling at -O2 for EON because we think it will increase code size. This is because we do not account the fact that making array index constant is good thing.
This tracks back to the fact that estimate_num_insns do not consider refs to be expensive thing at all. This patch adds simple costmodel for those accounting the address arithmetics + updates inliner to take into account when inlining eliminate them. Bootstrapped/regtested x86_64-linux, tested on few benchmarks (tramp3d, eon, mozilla) that it generally decrease code size without measurable slowdowns OK? Honza * tree-inline.c (estimate_ref_cost): New function. (estimate_operand_cost): New function. (estimate_num_insns): Estimate operand costs for calls, returns, moves and asm statements. * tree-inline.h (estimate_ref_cost): Declare. * ipa-inline-analysis.c (account_address_costs): New function. (estimate_function_body_sizes): Use it. Index: tree-inline.c =================================================================== --- tree-inline.c (revision 192761) +++ tree-inline.c (working copy) @@ -3322,6 +3322,69 @@ estimate_move_cost (tree type) return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES); } +/* Return cost of the reference REF. This is intended to primarily + handle address computations hidden in ARRAY_REFs and TARGET_MEM_REFs. + Do not dive recursively, the caller is supposed to walk all the + handled components. This is so to make it easy for other code to + compensate the cost when some of references becomes constant. */ + +int +estimate_ref_cost (tree ref) +{ + int cost = 0; + + switch (TREE_CODE (ref)) + { + case MEM_REF: + /* One variable addition may be needed. */ + if (TREE_CODE (TREE_OPERAND (ref, 1)) != INTEGER_CST) + cost++; + return cost; + + case TARGET_MEM_REF: + if (TREE_CODE (TMR_INDEX (ref)) != INTEGER_CST + || TREE_CODE (TMR_STEP (ref)) != INTEGER_CST) + cost++; + if ((TMR_INDEX2 (ref) + && TREE_CODE (TMR_INDEX2 (ref)) != INTEGER_CST) + || TREE_CODE (TMR_OFFSET (ref)) != INTEGER_CST) + cost++; + return cost; + + case ARRAY_REF: + case ARRAY_RANGE_REF: + if (TREE_CODE (TREE_OPERAND (ref, 1)) == INTEGER_CST + && (TREE_CODE (array_ref_low_bound (ref)) == INTEGER_CST) + && (TREE_CODE (array_ref_element_size (ref)) == INTEGER_CST)) + return 0; + /* Arrays of variable length objects are more costly by needing + arbitrary multiply. */ + if (TREE_CODE (TYPE_SIZE (TREE_TYPE (ref))) + == INTEGER_CST) + return 2; + else + return 1; + default: + return 0; + } +} + +/* For operands of load/stores estimate cost of the address computations + involved. */ + +static int +estimate_operand_cost (tree op) +{ + int cost = 0; + while (handled_component_p (op)) + { + cost += estimate_ref_cost (op); + op = TREE_OPERAND (op, 0); + } + /* account (TARGET_)MEM_REF. */ + return cost + estimate_ref_cost (op); +} + /* Returns cost of operation CODE, according to WEIGHTS */ static int @@ -3520,6 +3583,9 @@ estimate_num_insns (gimple stmt, eni_wei if (!is_gimple_reg (rhs) && !is_gimple_min_invariant (rhs)) cost += estimate_move_cost (TREE_TYPE (rhs)); + cost += estimate_operand_cost (lhs); + cost += estimate_operand_cost (rhs); + cost += estimate_operator_cost (gimple_assign_rhs_code (stmt), weights, gimple_assign_rhs1 (stmt), get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) @@ -3585,17 +3651,24 @@ estimate_num_insns (gimple stmt, eni_wei cost = node ? weights->call_cost : weights->indirect_call_cost; if (gimple_call_lhs (stmt)) - cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt))); + { + cost += estimate_move_cost (TREE_TYPE (gimple_call_lhs (stmt))); + cost += estimate_operand_cost (gimple_call_lhs (stmt)); + } for (i = 0; i < gimple_call_num_args (stmt); i++) { tree arg = gimple_call_arg (stmt, i); + cost += estimate_operand_cost (arg); cost += estimate_move_cost (TREE_TYPE (arg)); } break; } case GIMPLE_RETURN: - return weights->return_cost; + return (weights->return_cost + + (gimple_return_retval (stmt) + ? estimate_operand_cost (gimple_return_retval (stmt)) + : 0)); case GIMPLE_GOTO: case GIMPLE_LABEL: @@ -3606,7 +3679,18 @@ estimate_num_insns (gimple stmt, eni_wei return 0; case GIMPLE_ASM: - return asm_str_count (gimple_asm_string (stmt)); + { + int cost = asm_str_count (gimple_asm_string (stmt)); + unsigned int i; + + for (i = 0; i < gimple_asm_ninputs (stmt); i++) + cost += estimate_operand_cost + (TREE_VALUE (gimple_asm_input_op (stmt, i))); + for (i = 0; i < gimple_asm_noutputs (stmt); i++) + cost += estimate_operand_cost + (TREE_VALUE (gimple_asm_output_op (stmt, i))); + return cost; + } case GIMPLE_RESX: /* This is either going to be an external function call with one Index: tree-inline.h =================================================================== --- tree-inline.h (revision 192761) +++ tree-inline.h (working copy) @@ -186,6 +186,7 @@ tree copy_decl_no_change (tree decl, cop int estimate_move_cost (tree type); int estimate_num_insns (gimple, eni_weights *); int estimate_num_insns_fn (tree, eni_weights *); +int estimate_ref_cost (tree); int count_insns_seq (gimple_seq, eni_weights *); bool tree_versionable_function_p (tree); Index: ipa-inline-analysis.c =================================================================== --- ipa-inline-analysis.c (revision 192761) +++ ipa-inline-analysis.c (working copy) @@ -2240,6 +2240,77 @@ predicate_for_phi_result (struct inline_ SSA_NAME_VERSION (gimple_phi_result (phi)), *p); } +/* Account costs related to the address operation + of OP executed with frequency FREQ with predicate P. + INFO and PARMS_INFO hold the function analyzed, NONCONSTANT_NAMES + the vector of known SSA names. + THIS_TIME/THIS_SIZE will be updated accordingly. */ +static void +account_address_costs (tree op, int freq, + struct inline_summary *info, + struct ipa_node_params *parms_info, + struct predicate *p, + VEC (predicate_t, heap) *nonconstant_names, + int &this_time, int &this_size) +{ + int cost; + while (handled_component_p (op)) + { + if ((TREE_CODE (op) == ARRAY_REF + || TREE_CODE (op) == ARRAY_RANGE_REF) + && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME + && (TREE_CODE (array_ref_low_bound (op)) == INTEGER_CST) + && (TREE_CODE (array_ref_element_size (op)) == INTEGER_CST) + && (cost = estimate_ref_cost (op)) != 0) + { + predicate ref_will_be_nonconstant; + if (parms_info) + { + ref_will_be_nonconstant + = will_be_nonconstant_expr_predicate + (parms_info, info, TREE_OPERAND (op, 1), + nonconstant_names); + ref_will_be_nonconstant + = and_predicates (info->conds, + &ref_will_be_nonconstant, + p); + } + else + ref_will_be_nonconstant = *p; + this_time -= cost; + this_size -= cost; + account_size_time (info, cost * 2, + cost * freq * 2, &ref_will_be_nonconstant); + } + op = TREE_OPERAND (op, 0); + } + if (TREE_CODE (op) == MEM_REF + && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME + && (cost = estimate_ref_cost (op)) != 0) + { + predicate ref_will_be_nonconstant; + if (parms_info) + { + ref_will_be_nonconstant + = will_be_nonconstant_expr_predicate + (parms_info, info, TREE_OPERAND (op, 1), + nonconstant_names); + ref_will_be_nonconstant + = and_predicates (info->conds, + &ref_will_be_nonconstant, + p); + } + else + ref_will_be_nonconstant = *p; + this_time -= cost; + this_size -= cost; + account_size_time (info, cost * 2, + cost * freq * 2, &ref_will_be_nonconstant); + } + gcc_checking_assert (this_time >= 0); + gcc_checking_assert (this_size >= 0); +} + /* Compute function body size parameters for NODE. When EARLY is true, we compute only simple summaries without non-trivial predicates to drive the early inliner. */ @@ -2351,11 +2422,14 @@ estimate_function_body_sizes (struct cgr fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n", ((double)freq)/CGRAPH_FREQ_BASE, this_size, this_time); } + time += this_time * freq; + size += this_size; if (is_gimple_call (stmt)) { struct cgraph_edge *edge = cgraph_edge (node, stmt); struct inline_edge_summary *es = inline_edge_summary (edge); + int i; /* Special case: results of BUILT_IN_CONSTANT_P will be always resolved as constant. We however don't want to optimize @@ -2387,13 +2461,26 @@ estimate_function_body_sizes (struct cgr } } + /* Account address costs separately. */ + if (gimple_call_lhs (stmt)) + account_address_costs (gimple_call_lhs (stmt), freq, info, + parms_info, &bb_predicate, + nonconstant_names, + this_time, this_size); + for (i = 0; i < (int) gimple_call_num_args (stmt); i++) + account_address_costs (gimple_call_arg (stmt, i), freq, + info, parms_info, &bb_predicate, + nonconstant_names, + this_time, this_size); + es->call_stmt_size = this_size; es->call_stmt_time = this_time; es->loop_depth = bb_loop_depth (bb); edge_set_predicate (edge, &bb_predicate); + continue; } - /* TODO: When conditional jump or swithc is known to be constant, but + /* TODO: When conditional jump or switch is known to be constant, but we did not translate it into the predicates, we really can account just maximum of the possible paths. */ if (parms_info) @@ -2403,10 +2490,7 @@ estimate_function_body_sizes (struct cgr if (this_time || this_size) { struct predicate p; - - this_time *= freq; - time += this_time; - size += this_size; + unsigned int i; prob = eliminated_by_inlining_prob (stmt); if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) @@ -2420,6 +2504,42 @@ estimate_function_body_sizes (struct cgr else p = true_predicate (); + switch (gimple_code (stmt)) + { + case GIMPLE_ASSIGN: + account_address_costs (gimple_assign_lhs (stmt), freq, info, + parms_info, &p, nonconstant_names, + this_time, this_size); + if (gimple_assign_single_p (stmt)) + account_address_costs (gimple_assign_rhs1 (stmt), freq, info, + parms_info, &p, nonconstant_names, + this_time, this_size); + break; + case GIMPLE_RETURN: + if (gimple_return_retval (stmt)) + account_address_costs (gimple_return_retval (stmt), freq, info, + parms_info, &p, nonconstant_names, + this_time, this_size); + break; + case GIMPLE_ASM: + for (i = 0; i < gimple_asm_ninputs (stmt); i++) + account_address_costs (TREE_VALUE (gimple_asm_input_op + (stmt, i)), + freq, info, + parms_info, &p, nonconstant_names, + this_time, this_size); + for (i = 0; i < gimple_asm_noutputs (stmt); i++) + account_address_costs (TREE_VALUE (gimple_asm_output_op + (stmt, i)), + freq, info, + parms_info, &p, nonconstant_names, + this_time, this_size); + default: + break; + } + + this_time *= freq; + /* We account everything but the calls. Calls have their own size/time info attached to cgraph edges. This is necessary in order to make the cost disappear after inlining. */