This is https://gcc.gnu.org/ml/gcc-patches/2019-03/msg01299.html with the fixes Segher requested, plus a few more: - delete PREFERRED_RELOAD_CLASS changes - adjust for recent register renumbering - use defines rather than hard coding register numbers - flip altivec/float test when dealing with moves within vsx regs, so that the altivec hard reg count is preferred over the fp hard reg count when both reg types are possible. - use 2 for power9 direct move cost, and remove more '?'s from insns. - use reg_class_subset_p in the test for slow LR/CTR moves
Bootstrapped and regression tested powerpc64le-linux. OK for mainline? PR target/89271 * config/rs6000/rs6000.h (enum reg_class, REG_CLASS_NAMES), (REG_CLASS_CONTENTS): Add GEN_OR_VSX_REGS class. * config/rs6000/rs6000.c (rs6000_register_move_cost): Correct cost for general <-> vsx when direct moves are available. Cost union classes at minimal cost for any reg in the class. Correct calculation for moves between vsx, float, and altivec. Don't return a low cost for moves between special regs. Don't use hard coded register numbers. (TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS): Define. (rs6000_ira_change_pseudo_allocno_class): New function. * config/rs6000/rs6000.md (movsi_internal1, mov<mode>_internal), (movdi_internal32, movdi_internal64): Remove '*' from vsx register alternatives. (movsi_internal1): Don't disparage vector alternatives. (mov<mode>_internal): Likewise, excepting alternative that will be split. * config/rs6000/vsx.md (vsx_splat_<mode>_reg): Don't disparage we <- b alternative. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 5d5765d89b2..e7c63c263ae 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1729,6 +1729,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost #undef TARGET_MEMORY_MOVE_COST #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost +#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS +#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ + rs6000_ira_change_pseudo_allocno_class #undef TARGET_CANNOT_COPY_INSN_P #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p #undef TARGET_RTX_COSTS @@ -34648,22 +34651,54 @@ rs6000_register_move_cost (machine_mode mode, reg_class_t from, reg_class_t to) { int ret; + reg_class_t rclass; if (TARGET_DEBUG_COST) dbg_cost_ctrl++; + /* If we have VSX, we can easily move between FPR or Altivec registers, + otherwise we can only easily move within classes. + Do this first so we give best-case answers for union classes + containing both gprs and vsx regs. */ + HARD_REG_SET to_vsx, from_vsx; + COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]); + AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]); + COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]); + AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]); + if (!hard_reg_set_empty_p (to_vsx) + && !hard_reg_set_empty_p (from_vsx) + && (TARGET_VSX + || hard_reg_set_intersect_p (to_vsx, from_vsx))) + { + int reg = FIRST_FPR_REGNO; + if (TARGET_VSX + || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO) + && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO))) + reg = FIRST_ALTIVEC_REGNO; + ret = 2 * hard_regno_nregs (reg, mode); + } + /* Moves from/to GENERAL_REGS. */ - if (reg_classes_intersect_p (to, GENERAL_REGS) - || reg_classes_intersect_p (from, GENERAL_REGS)) + else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS)) + || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS))) { - reg_class_t rclass = from; - - if (! reg_classes_intersect_p (to, GENERAL_REGS)) - rclass = to; - if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) - ret = (rs6000_memory_move_cost (mode, rclass, false) - + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); + { + if (TARGET_DIRECT_MOVE) + { + if (rs6000_tune != PROCESSOR_POWER9) + ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode); + else + ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode); + /* SFmode requires a conversion when moving between gprs + and vsx. */ + if (mode == SFmode) + ret += 2; + } + else + ret = (rs6000_memory_move_cost (mode, rclass, false) + + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); + } /* It's more expensive to move CR_REGS than CR0_REGS because of the shift. */ @@ -34676,24 +34711,14 @@ rs6000_register_move_cost (machine_mode mode, || rs6000_tune == PROCESSOR_POWER7 || rs6000_tune == PROCESSOR_POWER8 || rs6000_tune == PROCESSOR_POWER9) - && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) - ret = 6 * hard_regno_nregs (0, mode); + && reg_class_subset_p (rclass, SPECIAL_REGS)) + ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode); else /* A move will cost one instruction per GPR moved. */ - ret = 2 * hard_regno_nregs (0, mode); + ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode); } - /* If we have VSX, we can easily move between FPR or Altivec registers. */ - else if (VECTOR_MEM_VSX_P (mode) - && reg_classes_intersect_p (to, VSX_REGS) - && reg_classes_intersect_p (from, VSX_REGS)) - ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode); - - /* Moving between two similar registers is just one instruction. */ - else if (reg_classes_intersect_p (to, from)) - ret = (FLOAT128_2REG_P (mode)) ? 4 : 2; - /* Everything else has to go through GENERAL_REGS. */ else ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) @@ -34746,6 +34771,64 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, return ret; } +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. + + The register allocator chooses GEN_OR_VSX_REGS for the allocno + class if GENERAL_REGS and VSX_REGS cost is lower than the memory + cost. This happens a lot when TARGET_DIRECT_MOVE makes the register + move cost between GENERAL_REGS and VSX_REGS low. + + It might seem reasonable to use a union class. After all, if usage + of vsr is low and gpr high, it might make sense to spill gpr to vsr + rather than memory. However, in cases where register pressure of + both is high, like the cactus_adm spec test, allowing + GEN_OR_VSX_REGS as the allocno class results in bad decisions in + the first scheduling pass. This is partly due to an allocno of + GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure + class, which gives too high a pressure for GENERAL_REGS and too low + for VSX_REGS. So, force a choice of the subclass here. + + The best class is also the union if GENERAL_REGS and VSX_REGS have + the same cost. In that case we do use GEN_OR_VSX_REGS as the + allocno class, since trying to narrow down the class by regno mode + is prone to error. For example, SImode is allowed in VSX regs and + in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect) + it would be wrong to choose an allocno of GENERAL_REGS based on + SImode. */ + +static reg_class_t +rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, + reg_class_t allocno_class, + reg_class_t best_class) +{ + switch (allocno_class) + { + case GEN_OR_VSX_REGS: + /* best_class must be a subset of allocno_class. */ + gcc_checking_assert (best_class == GEN_OR_VSX_REGS + || best_class == GEN_OR_FLOAT_REGS + || best_class == VSX_REGS + || best_class == ALTIVEC_REGS + || best_class == FLOAT_REGS + || best_class == GENERAL_REGS + || best_class == BASE_REGS); + /* Use best_class but choose wider classes when copying from the + wider class to best_class is cheap. This mimics IRA choice + of allocno class. */ + if (best_class == BASE_REGS) + return GENERAL_REGS; + if (TARGET_VSX + && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS)) + return VSX_REGS; + return best_class; + + default: + break; + } + + return allocno_class; +} + /* Returns a code for a target-specific builtin that implements reciprocal of the function, or NULL_TREE if not available. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 14a9e199bc8..30a72dd5e55 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1141,6 +1141,7 @@ enum reg_class VRSAVE_REGS, VSCR_REGS, GEN_OR_FLOAT_REGS, + GEN_OR_VSX_REGS, LINK_REGS, CTR_REGS, LINK_OR_CTR_REGS, @@ -1169,6 +1170,7 @@ enum reg_class "VRSAVE_REGS", \ "VSCR_REGS", \ "GEN_OR_FLOAT_REGS", \ + "GEN_OR_VSX_REGS", \ "LINK_REGS", \ "CTR_REGS", \ "LINK_OR_CTR_REGS", \ @@ -1205,6 +1207,8 @@ enum reg_class { 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, \ /* GEN_OR_FLOAT_REGS. */ \ { 0xffffffff, 0xffffffff, 0x00000000, 0x00004008 }, \ + /* GEN_OR_VSX_REGS. */ \ + { 0xffffffff, 0xffffffff, 0xffffffff, 0x00004008 }, \ /* LINK_REGS. */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000001 }, \ /* CTR_REGS. */ \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 411d7f0d352..8da7aba4080 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6830,10 +6830,10 @@ (define_insn "movsi_low" ;; MF%1 MT%0 NOP (define_insn "*movsi_internal1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r, r, r, ?*wI, ?*wH, - m, ?Z, ?Z, r, r, - r, ?*wIwH, ?*wJwK, ?*wJwK, ?*wu, - ?*wJwK, ?*wH, ?*wK, ?*wIwH, ?r, + "=r, r, r, wI, wH, + m, Z, Z, r, r, + r, wIwH, wJwK, wJwK, wu, + wJwK, wH, wK, wIwH, r, r, *h, *h") (match_operand:SI 1 "input_operand" @@ -7104,13 +7104,13 @@ (define_expand "mov<mode>" ;; MTVSRWZ MF%1 MT%1 NOP (define_insn "*mov<mode>_internal" [(set (match_operand:QHI 0 "nonimmediate_operand" - "=r, r, ?*wJwK, m, Z, r, - ?*wJwK, ?*wJwK, ?*wJwK, ?*wK, ?*wK, r, - ?*wJwK, r, *c*l, *h") + "=r, r, wJwK, m, Z, r, + wJwK, wJwK, wJwK, wK, ?wK, r, + wJwK, r, *c*l, *h") (match_operand:QHI 1 "input_operand" "r, m, Z, r, wJwK, i, - wJwK, O, wM, wB, wS, ?*wJwK, + wJwK, O, wM, wB, wS, wJwK, r, *h, r, 0"))] "gpc_reg_operand (operands[0], <MODE>mode) @@ -8671,8 +8671,8 @@ (define_insn "*movdi_internal32" [(set (match_operand:DI 0 "nonimmediate_operand" "=Y, r, r, m, ^d, ^d, r, wY, Z, ^wb, $wv, ^wi, - *wo, *wo, *wv, *wi, *wi, *wv, - *wv") + wo, wo, wv, wi, *i, wv, + wv") (match_operand:DI 1 "input_operand" "r, Y, r, ^d, m, ^d, @@ -8751,9 +8751,9 @@ (define_insn "*movdi_internal64" [(set (match_operand:DI 0 "nonimmediate_operand" "=YZ, r, r, r, r, r, m, ^d, ^d, wY, Z, $wb, - $wv, ^wi, *wo, *wo, *wv, *wi, - *wi, *wv, *wv, r, *h, *h, - ?*r, ?*wg, ?*r, ?*wj") + $wv, ^wi, wo, wo, wv, wi, + wi, wv, wv, r, *h, *h, + ?r, ?wg, ?r, ?wj") (match_operand:DI 1 "input_operand" "r, YZ, r, I, L, nF, diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 607c0cd33f2..80434d10247 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4106,7 +4106,7 @@ (define_expand "vsx_splat_<mode>" }) (define_insn "vsx_splat_<mode>_reg" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we") + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,we") (vec_duplicate:VSX_D (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))] "VECTOR_MEM_VSX_P (<MODE>mode)" -- Alan Modra Australia Development Lab, IBM