As promised, this breaks out the changes to the IVOPTS cost model and the added function in double-int.c. Please let me know if you would rather see me attempt to consolidate the IVOPTS logic into expmed.c per Richard H's suggestion.
I ran into a glitch with multiply_by_const_cost. The original code declared a static htab_t in the function and allocated it on demand. When I tried adding a second one in the same manner, I ran into a locking problem in the memory management library code during a call to delete_htab. The original implementation seemed a bit dicey to me anyway, so I changed this to explicitly allocate and deallocate the hash tables on (entry to/exit from) IVOPTS. This reduces the scope of the hash table from a compilation unit to each individual function. If it's preferred to maintain compilation unit scope, then the initialization/finalization of the htabs can be pushed out to do_compile. But I doubt it's worth that. Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new regressions. Ok for trunk? Thanks, Bill 2012-06-21 Bill Schmidt <wschm...@linux.ibm.com> * double-int.c (double_int_multiple_of): New function. * double-int.h (double_int_multiple_of): New decl. * tree-ssa-loop-ivopts.c (add_cost, zero_cost): Remove undefs. (mbc_entry_hash): New forward decl. (mbc_entry_eq): Likewise. (zero_cost): Change to no_cost. (mult_costs): New static var. (tree_ssa_iv_optimize_init): Initialize mult_costs. (add_cost): Change to add_regs_cost; distinguish costs by speed. (multiply_regs_cost): New function. (add_const_cost): Likewise. (extend_or_trunc_reg_cost): Likewise. (negate_reg_cost): Likewise. (multiply_by_cost): Change to multiply_by_const_cost; distinguish costs by speed. (get_address_cost): Change add_cost to add_regs_cost; change multiply_by_cost to multiply_by_const_cost. (force_expr_to_var_cost): Change zero_cost to no_cost; change add_cost to add_regs_cost; change multiply_by_cost to multiply_by_const_cost. (split_cost): Change zero_cost to no_cost. (ptr_difference_cost): Likewise. (difference_cost): Change zero_cost to no_cost; change multiply_by_cost to multiply_by_const_cost. (get_computation_cost_at): Change add_cost to add_regs_cost; change multiply_by_cost to multiply_by_const_cost. (determine_use_iv_cost_generic): Change zero_cost to no_cost. (determine_iv_cost): Change add_cost to add_regs_cost. (iv_ca_new): Change zero_cost to no_cost. (tree_ssa_iv_optimize_finalize): Release storage for mult_costs. * tree-ssa-address.c (most_expensive_mult_to_index): Change multiply_by_cost to multiply_by_const_cost. * tree-flow.h (multiply_by_cost): Change to multiply_by_const_cost. (add_regs_cost): New decl. (multiply_regs_cost): Likewise. (add_const_cost): Likewise. (extend_or_trunc_reg_cost): Likewise. (negate_reg_cost): Likewise. Index: gcc/double-int.c =================================================================== --- gcc/double-int.c (revision 188839) +++ gcc/double-int.c (working copy) @@ -865,6 +865,26 @@ double_int_umod (double_int a, double_int b, unsig return double_int_mod (a, b, true, code); } +/* Return TRUE iff PRODUCT is an integral multiple of FACTOR, and return + the multiple in *MULTIPLE. Otherwise return FALSE and leave *MULTIPLE + unchanged. */ + +bool +double_int_multiple_of (double_int product, double_int factor, + bool unsigned_p, double_int *multiple) +{ + double_int remainder; + double_int quotient = double_int_divmod (product, factor, unsigned_p, + TRUNC_DIV_EXPR, &remainder); + if (double_int_zero_p (remainder)) + { + *multiple = quotient; + return true; + } + + return false; +} + /* Set BITPOS bit in A. */ double_int double_int_setbit (double_int a, unsigned bitpos) Index: gcc/double-int.h =================================================================== --- gcc/double-int.h (revision 188839) +++ gcc/double-int.h (working copy) @@ -150,6 +150,8 @@ double_int double_int_divmod (double_int, double_i double_int double_int_sdivmod (double_int, double_int, unsigned, double_int *); double_int double_int_udivmod (double_int, double_int, unsigned, double_int *); +bool double_int_multiple_of (double_int, double_int, bool, double_int *); + double_int double_int_setbit (double_int, unsigned); int double_int_ctz (double_int); Index: gcc/tree-ssa-loop-ivopts.c =================================================================== --- gcc/tree-ssa-loop-ivopts.c (revision 188839) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -89,13 +89,11 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "tree-inline.h" #include "tree-ssa-propagate.h" - -/* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses. - */ #include "expmed.h" -#undef add_cost -#undef zero_cost +static hashval_t mbc_entry_hash (const void *); +static int mbc_entry_eq (const void*, const void *); + /* FIXME: Expressions are expanded to RTL in this pass to determine the cost of different addressing modes. This should be moved to a TBD interface between the GIMPLE and RTL worlds. */ @@ -162,7 +160,7 @@ typedef struct complex expressions and addressing modes). */ } comp_cost; -static const comp_cost zero_cost = {0, 0}; +static const comp_cost no_cost = {0, 0}; static const comp_cost infinite_cost = {INFTY, INFTY}; /* The candidate - cost pair. */ @@ -386,6 +384,9 @@ struct iv_ca_delta static VEC(tree,heap) *decl_rtl_to_reset; +/* Cached costs for multiplies by constants. */ +static htab_t mult_costs[2]; + static comp_cost force_expr_to_var_cost (tree, bool); /* Number of uses recorded in DATA. */ @@ -869,6 +870,9 @@ tree_ssa_iv_optimize_init (struct ivopts_data *dat htab_inv_expr_eq, free); data->inv_expr_id = 0; decl_rtl_to_reset = VEC_alloc (tree, heap, 20); + + mult_costs[0] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free); + mult_costs[1] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free); } /* Returns a memory object to that EXPR points. In case we are able to @@ -3057,16 +3061,19 @@ adjust_setup_cost (struct ivopts_data *data, unsig /* Returns cost of addition in MODE. */ -static unsigned -add_cost (enum machine_mode mode, bool speed) +unsigned +add_regs_cost (enum machine_mode mode, bool speed) { - static unsigned costs[NUM_MACHINE_MODES]; + static unsigned costs[NUM_MACHINE_MODES][2]; rtx seq; unsigned cost; - if (costs[mode]) - return costs[mode]; + if (speed) + speed = 1; + if (costs[mode][speed]) + return costs[mode][speed]; + start_sequence (); force_operand (gen_rtx_fmt_ee (PLUS, mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), @@ -3079,7 +3086,7 @@ adjust_setup_cost (struct ivopts_data *data, unsig if (!cost) cost = 1; - costs[mode] = cost; + costs[mode][speed] = cost; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Addition in %s costs %d\n", @@ -3087,6 +3094,160 @@ adjust_setup_cost (struct ivopts_data *data, unsig return cost; } +/* Returns cost of multiplication in MODE. */ + +unsigned +multiply_regs_cost (enum machine_mode mode, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES][2]; + rtx seq; + unsigned cost; + + if (speed) + speed = 1; + + if (costs[mode][speed]) + return costs[mode][speed]; + + start_sequence (); + force_operand (gen_rtx_fmt_ee (MULT, mode, + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)), + NULL_RTX); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + costs[mode][speed] = cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Multiplication in %s costs %d\n", + GET_MODE_NAME (mode), cost); + return cost; +} + +/* Returns cost of addition with a constant in MODE. */ + +unsigned +add_const_cost (enum machine_mode mode, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES][2]; + rtx seq; + unsigned cost; + + if (speed) + speed = 1; + + if (costs[mode][speed]) + return costs[mode][speed]; + + /* Arbitrarily generate insns for x + 2, as the exact constant + shouldn't matter. */ + start_sequence (); + force_operand (gen_rtx_fmt_ee (PLUS, mode, + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), + gen_int_mode (2, mode)), + NULL_RTX); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + costs[mode][speed] = cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Addition to constant in %s costs %d\n", + GET_MODE_NAME (mode), cost); + return cost; +} + +/* Returns cost of extend or truncate in MODE. */ + +unsigned +extend_or_trunc_reg_cost (tree type_to, tree type_from, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES][NUM_MACHINE_MODES][2]; + rtx seq; + unsigned cost; + enum machine_mode mode_to = TYPE_MODE (type_to); + enum machine_mode mode_from = TYPE_MODE (type_from); + tree size_to = TYPE_SIZE (type_to); + tree size_from = TYPE_SIZE (type_from); + enum rtx_code code; + + gcc_assert (TREE_CODE (size_to) == INTEGER_CST + && TREE_CODE (size_from) == INTEGER_CST); + + if (speed) + speed = 1; + + if (costs[mode_to][mode_from][speed]) + return costs[mode_to][mode_from][speed]; + + if (tree_int_cst_lt (size_to, size_from)) + code = TRUNCATE; + else if (TYPE_UNSIGNED (type_to)) + code = ZERO_EXTEND; + else + code = SIGN_EXTEND; + + start_sequence (); + gen_rtx_fmt_e (code, mode_to, + gen_raw_REG (mode_from, LAST_VIRTUAL_REGISTER + 1)); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Conversion from %s to %s costs %d\n", + GET_MODE_NAME (mode_to), GET_MODE_NAME (mode_from), cost); + + costs[mode_to][mode_from][speed] = cost; + return cost; +} + +/* Returns cost of negation in MODE. */ + +unsigned +negate_reg_cost (enum machine_mode mode, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES][2]; + rtx seq; + unsigned cost; + + if (speed) + speed = 1; + + if (costs[mode][speed]) + return costs[mode][speed]; + + start_sequence (); + force_operand (gen_rtx_fmt_e (NEG, mode, + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1)), + NULL_RTX); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + costs[mode][speed] = cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Negation in %s costs %d\n", + GET_MODE_NAME (mode), cost); + return cost; +} + /* Entry in a hashtable of already known costs for multiplication. */ struct mbc_entry { @@ -3120,19 +3281,20 @@ mbc_entry_eq (const void *entry1, const void *entr /* Returns cost of multiplication by constant CST in MODE. */ unsigned -multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed) +multiply_by_const_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed) { - static htab_t costs; struct mbc_entry **cached, act; rtx seq; unsigned cost; - if (!costs) - costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free); + if (speed) + speed = 1; act.mode = mode; act.cst = cst; - cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT); + cached = (struct mbc_entry **) + htab_find_slot (mult_costs[speed], &act, INSERT); + if (*cached) return (*cached)->cost; @@ -3418,7 +3580,7 @@ get_address_cost (bool symbol_present, bool var_pr If VAR_PRESENT is true, try whether the mode with SYMBOL_PRESENT = false is cheaper even with cost of addition, and if this is the case, use it. */ - add_c = add_cost (address_mode, speed); + add_c = add_regs_cost (address_mode, speed); for (i = 0; i < 8; i++) { var_p = i & 1; @@ -3499,10 +3661,10 @@ get_address_cost (bool symbol_present, bool var_pr && multiplier_allowed_in_address_p (ratio, mem_mode, as)); if (ratio != 1 && !ratio_p) - cost += multiply_by_cost (ratio, address_mode, speed); + cost += multiply_by_const_cost (ratio, address_mode, speed); if (s_offset && !offset_p && !symbol_present) - cost += add_cost (address_mode, speed); + cost += add_regs_cost (address_mode, speed); if (may_autoinc) *may_autoinc = autoinc; @@ -3601,7 +3763,7 @@ force_expr_to_var_cost (tree expr, bool speed) STRIP_NOPS (expr); if (SSA_VAR_P (expr)) - return zero_cost; + return no_cost; if (is_gimple_min_invariant (expr)) { @@ -3633,12 +3795,12 @@ force_expr_to_var_cost (tree expr, bool speed) STRIP_NOPS (op1); if (is_gimple_val (op0)) - cost0 = zero_cost; + cost0 = no_cost; else cost0 = force_expr_to_var_cost (op0, speed); if (is_gimple_val (op1)) - cost1 = zero_cost; + cost1 = no_cost; else cost1 = force_expr_to_var_cost (op1, speed); @@ -3650,11 +3812,11 @@ force_expr_to_var_cost (tree expr, bool speed) op1 = NULL_TREE; if (is_gimple_val (op0)) - cost0 = zero_cost; + cost0 = no_cost; else cost0 = force_expr_to_var_cost (op0, speed); - cost1 = zero_cost; + cost1 = no_cost; break; default: @@ -3669,7 +3831,7 @@ force_expr_to_var_cost (tree expr, bool speed) case PLUS_EXPR: case MINUS_EXPR: case NEGATE_EXPR: - cost = new_cost (add_cost (mode, speed), 0); + cost = new_cost (add_regs_cost (mode, speed), 0); if (TREE_CODE (expr) != NEGATE_EXPR) { tree mult = NULL_TREE; @@ -3689,9 +3851,11 @@ force_expr_to_var_cost (tree expr, bool speed) case MULT_EXPR: if (cst_and_fits_in_hwi (op0)) - cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0); + cost = new_cost (multiply_by_const_cost (int_cst_value (op0), + mode, speed), 0); else if (cst_and_fits_in_hwi (op1)) - cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0); + cost = new_cost (multiply_by_const_cost (int_cst_value (op1), + mode, speed), 0); else return new_cost (target_spill_cost [speed], 0); break; @@ -3766,12 +3930,12 @@ split_address_cost (struct ivopts_data *data, { *symbol_present = true; *var_present = false; - return zero_cost; + return no_cost; } *symbol_present = false; *var_present = true; - return zero_cost; + return no_cost; } /* Estimates cost of expressing difference of addresses E1 - E2 as @@ -3796,7 +3960,7 @@ ptr_difference_cost (struct ivopts_data *data, *offset += diff; *symbol_present = false; *var_present = false; - return zero_cost; + return no_cost; } if (integer_zerop (e2)) @@ -3846,7 +4010,7 @@ difference_cost (struct ivopts_data *data, if (operand_equal_p (e1, e2, 0)) { *var_present = false; - return zero_cost; + return no_cost; } *var_present = true; @@ -3857,7 +4021,7 @@ difference_cost (struct ivopts_data *data, if (integer_zerop (e1)) { comp_cost cost = force_var_cost (data, e2, depends_on); - cost.cost += multiply_by_cost (-1, mode, data->speed); + cost.cost += multiply_by_const_cost (-1, mode, data->speed); return cost; } @@ -4168,7 +4332,7 @@ get_computation_cost_at (struct ivopts_data *data, &symbol_present, &var_present, &offset, depends_on)); cost.cost /= avg_loop_niter (data->current_loop); - cost.cost += add_cost (TYPE_MODE (ctype), data->speed); + cost.cost += add_regs_cost (TYPE_MODE (ctype), data->speed); } if (inv_expr_id) @@ -4201,7 +4365,7 @@ get_computation_cost_at (struct ivopts_data *data, if (!symbol_present && !var_present && !offset) { if (ratio != 1) - cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed); + cost.cost += multiply_by_const_cost (ratio, TYPE_MODE (ctype), speed); return cost; } @@ -4209,18 +4373,18 @@ get_computation_cost_at (struct ivopts_data *data, are added once to the variable, if present. */ if (var_present && (symbol_present || offset)) cost.cost += adjust_setup_cost (data, - add_cost (TYPE_MODE (ctype), speed)); + add_regs_cost (TYPE_MODE (ctype), speed)); /* Having offset does not affect runtime cost in case it is added to symbol, but it increases complexity. */ if (offset) cost.complexity++; - cost.cost += add_cost (TYPE_MODE (ctype), speed); + cost.cost += add_regs_cost (TYPE_MODE (ctype), speed); aratio = ratio > 0 ? ratio : -ratio; if (aratio != 1) - cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed); + cost.cost += multiply_by_const_cost (aratio, TYPE_MODE (ctype), speed); return cost; fallback: @@ -4277,7 +4441,7 @@ determine_use_iv_cost_generic (struct ivopts_data if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) { - set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, + set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE, ERROR_MARK, -1); return true; } @@ -5066,7 +5230,7 @@ determine_iv_cost (struct ivopts_data *data, struc or a const set. */ if (cost_base.cost == 0) cost_base.cost = COSTS_N_INSNS (1); - cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); + cost_step = add_regs_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); cost = cost_step + adjust_setup_cost (data, cost_base.cost); @@ -5561,10 +5725,10 @@ iv_ca_new (struct ivopts_data *data) nw->cands = BITMAP_ALLOC (NULL); nw->n_cands = 0; nw->n_regs = 0; - nw->cand_use_cost = zero_cost; + nw->cand_use_cost = no_cost; nw->cand_cost = 0; nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1); - nw->cost = zero_cost; + nw->cost = no_cost; nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1); nw->num_used_inv_expr = 0; @@ -6638,6 +6802,8 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data VEC_free (iv_use_p, heap, data->iv_uses); VEC_free (iv_cand_p, heap, data->iv_candidates); htab_delete (data->inv_expr_tab); + htab_delete (mult_costs[0]); + htab_delete (mult_costs[1]); } /* Returns true if the loop body BODY includes any function calls. */ Index: gcc/tree-ssa-address.c =================================================================== --- gcc/tree-ssa-address.c (revision 188839) +++ gcc/tree-ssa-address.c (working copy) @@ -556,7 +556,7 @@ most_expensive_mult_to_index (tree type, struct me || !multiplier_allowed_in_address_p (coef, TYPE_MODE (type), as)) continue; - acost = multiply_by_cost (coef, address_mode, speed); + acost = multiply_by_const_cost (coef, address_mode, speed); if (acost > best_mult_cost) { Index: gcc/tree-flow.h =================================================================== --- gcc/tree-flow.h (revision 188839) +++ gcc/tree-flow.h (working copy) @@ -810,7 +810,12 @@ bool expr_invariant_in_loop_p (struct loop *, tree bool stmt_invariant_in_loop_p (struct loop *, gimple); bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode, addr_space_t); -unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode, bool); +unsigned multiply_by_const_cost (HOST_WIDE_INT, enum machine_mode, bool); +unsigned add_regs_cost (enum machine_mode, bool); +unsigned multiply_regs_cost (enum machine_mode, bool); +unsigned add_const_cost (enum machine_mode, bool); +unsigned extend_or_trunc_reg_cost (tree, tree, bool); +unsigned negate_reg_cost (enum machine_mode, bool); bool may_be_nonaddressable_p (tree expr); /* In tree-ssa-threadupdate.c. */