When optimizing for size we shouldn't be using metrics based on speed or vice-versa. rtlanal.c:get_full_rtx_cost wants both speed and size metric from rs6000_rtx_costs independent of the global optimize_size.
Note that the patch changes param_simultaneous_prefetches, param_l1_cache_size, param_l1_cache_line_size and param_l2_cache_size, which were previously all set to zero for optimize_size. I think that was a bug. Those params are a function of the processor. * config/rs6000/rs6000.h (rs6000_cost): Don't declare. (struct processor_costs): Move to.. * config/rs6000/rs6000.c: ..here. (rs6000_cost): Make static. (rs6000_option_override_internal): Ignore optimize_size when setting up rs6000_cost. (rs6000_insn_cost): Take into account optimize_size here instead. (rs6000_emit_parity): Likewise. (rs6000_rtx_costs): Don't use rs6000_cost when !speed. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d455aa52427..14ecbad5df4 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -497,7 +497,26 @@ rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) /* Processor costs (relative to an add) */ -const struct processor_costs *rs6000_cost; +struct processor_costs { + const int mulsi; /* cost of SImode multiplication. */ + const int mulsi_const; /* cost of SImode multiplication by constant. */ + const int mulsi_const9; /* cost of SImode mult by short constant. */ + const int muldi; /* cost of DImode multiplication. */ + const int divsi; /* cost of SImode division. */ + const int divdi; /* cost of DImode division. */ + const int fp; /* cost of simple SFmode and DFmode insns. */ + const int dmul; /* cost of DFmode multiplication (and fmadd). */ + const int sdiv; /* cost of SFmode division (fdivs). */ + const int ddiv; /* cost of DFmode division (fdiv). */ + const int cache_line_size; /* cache line size in bytes. */ + const int l1_cache_size; /* size of l1 cache, in kilobytes. */ + const int l2_cache_size; /* size of l2 cache, in kilobytes. */ + const int simultaneous_prefetches; /* number of parallel prefetch + operations. */ + const int sfdf_convert; /* cost of SF->DF conversion. */ +}; + +static const struct processor_costs *rs6000_cost; /* Instruction size costs on 32bit processors. */ static const @@ -4618,131 +4637,128 @@ rs6000_option_override_internal (bool global_init_p) } /* Initialize rs6000_cost with the appropriate target costs. */ - if (optimize_size) - rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; - else - switch (rs6000_tune) - { - case PROCESSOR_RS64A: - rs6000_cost = &rs64a_cost; - break; + switch (rs6000_tune) + { + case PROCESSOR_RS64A: + rs6000_cost = &rs64a_cost; + break; - case PROCESSOR_MPCCORE: - rs6000_cost = &mpccore_cost; - break; + case PROCESSOR_MPCCORE: + rs6000_cost = &mpccore_cost; + break; - case PROCESSOR_PPC403: - rs6000_cost = &ppc403_cost; - break; + case PROCESSOR_PPC403: + rs6000_cost = &ppc403_cost; + break; - case PROCESSOR_PPC405: - rs6000_cost = &ppc405_cost; - break; + case PROCESSOR_PPC405: + rs6000_cost = &ppc405_cost; + break; - case PROCESSOR_PPC440: - rs6000_cost = &ppc440_cost; - break; + case PROCESSOR_PPC440: + rs6000_cost = &ppc440_cost; + break; - case PROCESSOR_PPC476: - rs6000_cost = &ppc476_cost; - break; + case PROCESSOR_PPC476: + rs6000_cost = &ppc476_cost; + break; - case PROCESSOR_PPC601: - rs6000_cost = &ppc601_cost; - break; + case PROCESSOR_PPC601: + rs6000_cost = &ppc601_cost; + break; - case PROCESSOR_PPC603: - rs6000_cost = &ppc603_cost; - break; + case PROCESSOR_PPC603: + rs6000_cost = &ppc603_cost; + break; - case PROCESSOR_PPC604: - rs6000_cost = &ppc604_cost; - break; + case PROCESSOR_PPC604: + rs6000_cost = &ppc604_cost; + break; - case PROCESSOR_PPC604e: - rs6000_cost = &ppc604e_cost; - break; + case PROCESSOR_PPC604e: + rs6000_cost = &ppc604e_cost; + break; - case PROCESSOR_PPC620: - rs6000_cost = &ppc620_cost; - break; + case PROCESSOR_PPC620: + rs6000_cost = &ppc620_cost; + break; - case PROCESSOR_PPC630: - rs6000_cost = &ppc630_cost; - break; + case PROCESSOR_PPC630: + rs6000_cost = &ppc630_cost; + break; - case PROCESSOR_CELL: - rs6000_cost = &ppccell_cost; - break; + case PROCESSOR_CELL: + rs6000_cost = &ppccell_cost; + break; - case PROCESSOR_PPC750: - case PROCESSOR_PPC7400: - rs6000_cost = &ppc750_cost; - break; + case PROCESSOR_PPC750: + case PROCESSOR_PPC7400: + rs6000_cost = &ppc750_cost; + break; - case PROCESSOR_PPC7450: - rs6000_cost = &ppc7450_cost; - break; + case PROCESSOR_PPC7450: + rs6000_cost = &ppc7450_cost; + break; - case PROCESSOR_PPC8540: - case PROCESSOR_PPC8548: - rs6000_cost = &ppc8540_cost; - break; + case PROCESSOR_PPC8540: + case PROCESSOR_PPC8548: + rs6000_cost = &ppc8540_cost; + break; - case PROCESSOR_PPCE300C2: - case PROCESSOR_PPCE300C3: - rs6000_cost = &ppce300c2c3_cost; - break; + case PROCESSOR_PPCE300C2: + case PROCESSOR_PPCE300C3: + rs6000_cost = &ppce300c2c3_cost; + break; - case PROCESSOR_PPCE500MC: - rs6000_cost = &ppce500mc_cost; - break; + case PROCESSOR_PPCE500MC: + rs6000_cost = &ppce500mc_cost; + break; - case PROCESSOR_PPCE500MC64: - rs6000_cost = &ppce500mc64_cost; - break; + case PROCESSOR_PPCE500MC64: + rs6000_cost = &ppce500mc64_cost; + break; - case PROCESSOR_PPCE5500: - rs6000_cost = &ppce5500_cost; - break; + case PROCESSOR_PPCE5500: + rs6000_cost = &ppce5500_cost; + break; - case PROCESSOR_PPCE6500: - rs6000_cost = &ppce6500_cost; - break; + case PROCESSOR_PPCE6500: + rs6000_cost = &ppce6500_cost; + break; - case PROCESSOR_TITAN: - rs6000_cost = &titan_cost; - break; + case PROCESSOR_TITAN: + rs6000_cost = &titan_cost; + break; - case PROCESSOR_POWER4: - case PROCESSOR_POWER5: - rs6000_cost = &power4_cost; - break; + case PROCESSOR_POWER4: + case PROCESSOR_POWER5: + rs6000_cost = &power4_cost; + break; - case PROCESSOR_POWER6: - rs6000_cost = &power6_cost; - break; + case PROCESSOR_POWER6: + rs6000_cost = &power6_cost; + break; - case PROCESSOR_POWER7: - rs6000_cost = &power7_cost; - break; + case PROCESSOR_POWER7: + rs6000_cost = &power7_cost; + break; - case PROCESSOR_POWER8: - rs6000_cost = &power8_cost; - break; + case PROCESSOR_POWER8: + rs6000_cost = &power8_cost; + break; - case PROCESSOR_POWER9: - case PROCESSOR_POWER10: - rs6000_cost = &power9_cost; - break; + case PROCESSOR_POWER9: + case PROCESSOR_POWER10: + rs6000_cost = &power9_cost; + break; - case PROCESSOR_PPCA2: - rs6000_cost = &ppca2_cost; - break; + case PROCESSOR_PPCA2: + rs6000_cost = &ppca2_cost; + break; - default: - gcc_unreachable (); - } + default: + gcc_unreachable (); + } if (global_init_p) { @@ -21438,15 +21454,17 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, case PLUS: case MINUS: - if (FLOAT_MODE_P (mode)) + if (speed && FLOAT_MODE_P (mode)) *total = rs6000_cost->fp; else *total = COSTS_N_INSNS (1); return false; case MULT: - if (CONST_INT_P (XEXP (x, 1)) - && satisfies_constraint_I (XEXP (x, 1))) + if (!speed) + *total = COSTS_N_INSNS (1); + else if (CONST_INT_P (XEXP (x, 1)) + && satisfies_constraint_I (XEXP (x, 1))) { if (INTVAL (XEXP (x, 1)) >= -256 && INTVAL (XEXP (x, 1)) <= 255) @@ -21465,7 +21483,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, return false; case FMA: - if (mode == SFmode) + if (!speed) + *total = COSTS_N_INSNS (1); + else if (mode == SFmode) *total = rs6000_cost->fp; else *total = rs6000_cost->dmul; @@ -21475,8 +21495,10 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, case MOD: if (FLOAT_MODE_P (mode)) { - *total = mode == DFmode ? rs6000_cost->ddiv - : rs6000_cost->sdiv; + if (!speed) + *total = COSTS_N_INSNS (1); + else + *total = mode == DFmode ? rs6000_cost->ddiv : rs6000_cost->sdiv; return false; } /* FALLTHRU */ @@ -21495,7 +21517,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, } else { - if (GET_MODE (XEXP (x, 1)) == DImode) + if (!speed) + *total = COSTS_N_INSNS (1); + else if (GET_MODE (XEXP (x, 1)) == DImode) *total = rs6000_cost->divdi; else *total = rs6000_cost->divsi; @@ -21587,7 +21611,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, if (outer_code == TRUNCATE && GET_CODE (XEXP (x, 0)) == MULT) { - if (mode == DImode) + if (!speed) + *total = COSTS_N_INSNS (1); + else if (mode == DImode) *total = rs6000_cost->muldi; else *total = rs6000_cost->mulsi; @@ -21622,11 +21648,16 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, case FIX: case UNSIGNED_FIX: case FLOAT_TRUNCATE: - *total = rs6000_cost->fp; + if (!speed) + *total = COSTS_N_INSNS (1); + else + *total = rs6000_cost->fp; return false; case FLOAT_EXTEND: - if (mode == DFmode) + if (!speed) + *total = COSTS_N_INSNS (1); + else if (mode == DFmode) *total = rs6000_cost->sfdf_convert; else *total = rs6000_cost->fp; @@ -21773,6 +21804,10 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) n = length / 4; } + const struct processor_costs *proc_cost = rs6000_cost; + if (optimize_size) + proc_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; + enum attr_type type = get_attr_type (insn); switch (type) @@ -21787,16 +21822,16 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) switch (get_attr_size (insn)) { case SIZE_8: - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9; + cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi_const9; break; case SIZE_16: - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const; + cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi_const; break; case SIZE_32: - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi; + cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi; break; case SIZE_64: - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi; + cost = COSTS_N_INSNS (n - 1) + proc_cost->muldi; break; default: gcc_unreachable (); @@ -21806,10 +21841,10 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) switch (get_attr_size (insn)) { case SIZE_32: - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi; + cost = COSTS_N_INSNS (n - 1) + proc_cost->divsi; break; case SIZE_64: - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi; + cost = COSTS_N_INSNS (n - 1) + proc_cost->divdi; break; default: gcc_unreachable (); @@ -21817,16 +21852,16 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) break; case TYPE_FP: - cost = n * rs6000_cost->fp; + cost = n * proc_cost->fp; break; case TYPE_DMUL: - cost = n * rs6000_cost->dmul; + cost = n * proc_cost->dmul; break; case TYPE_SDIV: - cost = n * rs6000_cost->sdiv; + cost = n * proc_cost->sdiv; break; case TYPE_DDIV: - cost = n * rs6000_cost->ddiv; + cost = n * proc_cost->ddiv; break; case TYPE_SYNC: @@ -22388,7 +22423,7 @@ rs6000_emit_parity (rtx dst, rtx src) if (mode == SImode) { /* Is mult+shift >= shift+xor+shift+xor? */ - if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) + if (!optimize_size && rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) { rtx tmp1, tmp2, tmp3, tmp4; @@ -22411,7 +22446,7 @@ rs6000_emit_parity (rtx dst, rtx src) else { /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ - if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) + if (!optimize_size && rs6000_cost->muldi >= COSTS_N_INSNS (5)) { rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index bbd8060e143..9daf55adc02 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1869,29 +1869,6 @@ extern scalar_int_mode rs6000_pmode; #define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) -/* Target cpu costs. */ - -struct processor_costs { - const int mulsi; /* cost of SImode multiplication. */ - const int mulsi_const; /* cost of SImode multiplication by constant. */ - const int mulsi_const9; /* cost of SImode mult by short constant. */ - const int muldi; /* cost of DImode multiplication. */ - const int divsi; /* cost of SImode division. */ - const int divdi; /* cost of DImode division. */ - const int fp; /* cost of simple SFmode and DFmode insns. */ - const int dmul; /* cost of DFmode multiplication (and fmadd). */ - const int sdiv; /* cost of SFmode division (fdivs). */ - const int ddiv; /* cost of DFmode division (fdiv). */ - const int cache_line_size; /* cache line size in bytes. */ - const int l1_cache_size; /* size of l1 cache, in kilobytes. */ - const int l2_cache_size; /* size of l2 cache, in kilobytes. */ - const int simultaneous_prefetches; /* number of parallel prefetch - operations. */ - const int sfdf_convert; /* cost of SF->DF conversion. */ -}; - -extern const struct processor_costs *rs6000_cost; - /* Control the assembler format that we output. */ /* A C string constant describing how to begin a comment in the target