This patch uses PADDI (PLI) to load up 34-bit DImode constants. This is the same patch as V5 patch #8.
Along with the other patches, I have done bootstraps on a little endian power8 system, and there were no regressions in the test suite. I have built both Spec 2006 and Spec 2017 with all of these patches installed using -mcpu=future, and there were no failures. Can I check this into the trunk? Note, I may have limited email access on October 17th and 18th, 2019. 2019-10-15 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/rs6000.c (num_insns_constant_gpr): Add support for PADDI to load up and/or add 34-bit integer constants. (rs6000_rtx_costs): Treat constants loaded up with PADDI with the same cost as normal 16-bit constants. * config/rs6000/rs6000.md (movdi_internal64): Add support to load up 34-bit integer constants with PADDI. (movdi integer constant splitter): Add comment about PADDI. Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 277025) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -5524,7 +5524,7 @@ static int num_insns_constant_gpr (HOST_WIDE_INT value) { /* signed constant loadable with addi */ - if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000) + if (SIGNED_16BIT_OFFSET_P (value)) return 1; /* constant loadable with addis */ @@ -5532,6 +5532,10 @@ num_insns_constant_gpr (HOST_WIDE_INT va && (value >> 31 == -1 || value >> 31 == 0)) return 1; + /* PADDI can support up to 34 bit signed integers. */ + else if (TARGET_PREFIXED_ADDR && SIGNED_34BIT_OFFSET_P (value)) + return 1; + else if (TARGET_POWERPC64) { HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; @@ -20641,7 +20645,8 @@ rs6000_rtx_costs (rtx x, machine_mode mo || outer_code == PLUS || outer_code == MINUS) && (satisfies_constraint_I (x) - || satisfies_constraint_L (x))) + || satisfies_constraint_L (x) + || satisfies_constraint_eI (x))) || (outer_code == AND && (satisfies_constraint_K (x) || (mode == SImode Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 277024) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -8823,24 +8823,24 @@ (define_split DONE; }) -;; GPR store GPR load GPR move GPR li GPR lis GPR # -;; FPR store FPR load FPR move AVX store AVX store AVX load -;; AVX load VSX move P9 0 P9 -1 AVX 0/-1 VSX 0 -;; VSX -1 P9 const AVX const From SPR To SPR SPR<->SPR -;; VSX->GPR GPR->VSX +;; GPR store GPR load GPR move GPR li GPR lis GPR pli +;; GPR # FPR store FPR load FPR move AVX store AVX store +;; AVX load AVX load VSX move P9 0 P9 -1 AVX 0/-1 +;; VSX 0 VSX -1 P9 const AVX const From SPR To SPR +;; SPR<->SPR VSX->GPR GPR->VSX (define_insn "*movdi_internal64" [(set (match_operand:DI 0 "nonimmediate_operand" "=YZ, r, r, r, r, r, - m, ^d, ^d, wY, Z, $v, - $v, ^wa, wa, wa, v, wa, - wa, v, v, r, *h, *h, - ?r, ?wa") + r, m, ^d, ^d, wY, Z, + $v, $v, ^wa, wa, wa, v, + wa, wa, v, v, r, *h, + *h, ?r, ?wa") (match_operand:DI 1 "input_operand" - "r, YZ, r, I, L, nF, - ^d, m, ^d, ^v, $v, wY, - Z, ^wa, Oj, wM, OjwM, Oj, - wM, wS, wB, *h, r, 0, - wa, r"))] + "r, YZ, r, I, L, eI, + nF, ^d, m, ^d, ^v, $v, + wY, Z, ^wa, Oj, wM, OjwM, + Oj, wM, wS, wB, *h, r, + 0, wa, r"))] "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], DImode) || gpc_reg_operand (operands[1], DImode))" @@ -8850,6 +8850,7 @@ (define_insn "*movdi_internal64" mr %0,%1 li %0,%1 lis %0,%v1 + li %0,%1 # stfd%U0%X0 %1,%0 lfd%U1%X1 %0,%1 @@ -8873,26 +8874,28 @@ (define_insn "*movdi_internal64" mtvsrd %x0,%1" [(set_attr "type" "store, load, *, *, *, *, - fpstore, fpload, fpsimple, fpstore, fpstore, fpload, - fpload, veclogical, vecsimple, vecsimple, vecsimple, veclogical, - veclogical, vecsimple, vecsimple, mfjmpr, mtjmpr, *, - mftgpr, mffgpr") + *, fpstore, fpload, fpsimple, fpstore, fpstore, + fpload, fpload, veclogical,vecsimple, vecsimple, vecsimple, + veclogical, veclogical, vecsimple, vecsimple, mfjmpr, mtjmpr, + *, mftgpr, mffgpr") (set_attr "size" "64") (set_attr "length" - "*, *, *, *, *, 20, - *, *, *, *, *, *, + "*, *, *, *, *, *, + 20, *, *, *, *, *, *, *, *, *, *, *, - *, 8, *, *, *, *, - *, *") + *, *, 8, *, *, *, + *, *, *") (set_attr "isa" - "*, *, *, *, *, *, - *, *, *, p9v, p7v, p9v, - p7v, *, p9v, p9v, p7v, *, - *, p7v, p7v, *, *, *, - p8v, p8v")]) + "*, *, *, *, *, fut, + *, *, *, *, p9v, p7v, + p9v, p7v, *, p9v, p9v, p7v, + *, *, p7v, p7v, *, *, + *, p8v, p8v")]) ; Some DImode loads are best done as a load of -1 followed by a mask -; instruction. +; instruction. On systems that support the PADDI (PLI) instruction, +; num_insns_constant returns 1, so these splitter would not be used for things +; that be loaded with PLI. (define_split [(set (match_operand:DI 0 "int_reg_operand_not_pseudo") (match_operand:DI 1 "const_int_operand"))] -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797