I am working on support for a future processor, and I noticed that when I did the power7 work initially in 2009, that I ordered the DF moves so that the VSX moves came before traditional floating point moves.
If reload needs to reload a floating point register, it will match first on the VSX instructions and generate LXSDX or STXSDX instead of the traditional LFD/LFDX and STFD/STFDX instructions. Because the LXSDX/STXSDX instructions are only REG+REG, reload needs to generate the stack offset in a GPR and use this. Note, for normal loads/stores, the register allocator will see if there are other options, and eventually match against the traditional floating point load and store. Reload however, seems to stop as soon as it finds an appropriate instruction. The following patch reorders the movdf patterns so that first the traditional floating point registers are considered, then the VSX registers, and finally the general purpose registers. I have bootstrapped the compiler with these changes, and had no regressions in the testsuite. I also ran the spec 2006 benchmark suite with/without these patches (using subversion id 193503 as the base). There were no slow downs that were outside of the normal range that I consider to be noise level (2%). The 447.dealII benchmark sped up by 14% (456.hmmer and 471.omnetpp sped up by 2%). Are these patches ok to apply? 2012-11-19 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/rs6000.md (movdf_hardfloat32): Reorder move constraints so that the traditional floating point loads, stores, and moves are done first, then the VSX loads, stores, and moves, and finally the GPR loads, stores, and moves so that reload chooses FPRs over GPRs, and uses the traditional load/store instructions which provide an offset. (movdf_hardfloat64): Likewise. -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 193635) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -8019,46 +8019,30 @@ (define_split ;; less efficient than loading the constant into an FP register, since ;; it will probably be used there. (define_insn "*movdf_hardfloat32" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,!r,!r,!r") - (match_operand:DF 1 "input_operand" "r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,G,H,F"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r") + (match_operand:DF 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))] "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], DFmode) || gpc_reg_operand (operands[1], DFmode))" - "* -{ - switch (which_alternative) - { - default: - gcc_unreachable (); - case 0: - case 1: - case 2: - return \"#\"; - case 3: - case 4: - return \"xxlor %x0,%x1,%x1\"; - case 5: - case 6: - return \"lxsd%U1x %x0,%y1\"; - case 7: - case 8: - return \"stxsd%U0x %x1,%y0\"; - case 9: - return \"stfd%U0%X0 %1,%0\"; - case 10: - return \"lfd%U1%X1 %0,%1\"; - case 11: - return \"fmr %0,%1\"; - case 12: - return \"xxlxor %x0,%x0,%x0\"; - case 13: - case 14: - case 15: - return \"#\"; - } -}" - [(set_attr "type" "store,load,two,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,*,*,*") - (set_attr "length" "8,8,8,4,4,4,4,4,4,4,4,4,4,8,12,16")]) + "@ + stfd%U0%X0 %1,%0 + lfd%U1%X1 %0,%1 + fmr %0,%1 + lxsd%U1x %x0,%y1 + lxsd%U1x %x0,%y1 + stxsd%U0x %x1,%y0 + stxsd%U0x %x1,%y0 + xxlor %x0,%x1,%x1 + xxlor %x0,%x1,%x1 + xxlxor %x0,%x0,%x0 + # + # + # + # + # + #" + [(set_attr "type" "fpstore,fpload,fp,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,store,load,two,fp,fp,*") + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")]) (define_insn "*movdf_softfloat32" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,r,r,r") @@ -8131,25 +8115,25 @@ (define_insn "*movdf_hardfloat64_mfpgpr" ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*movdf_hardfloat64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r") - (match_operand:DF 1 "input_operand" "r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,Y,r,!r,ws,?wa,Z,?Z,ws,?wa,wa,*c*l,!r,*h,!r,!r,!r") + (match_operand:DF 1 "input_operand" "d,m,d,r,Y,r,Z,Z,ws,wa,ws,wa,j,r,h,0,G,H,F"))] "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], DFmode) || gpc_reg_operand (operands[1], DFmode))" "@ + stfd%U0%X0 %1,%0 + lfd%U1%X1 %0,%1 + fmr %0,%1 std%U0%X0 %1,%0 ld%U1%X1 %0,%1 mr %0,%1 - xxlor %x0,%x1,%x1 - xxlor %x0,%x1,%x1 lxsd%U1x %x0,%y1 lxsd%U1x %x0,%y1 stxsd%U0x %x1,%y0 stxsd%U0x %x1,%y0 - stfd%U0%X0 %1,%0 - lfd%U1%X1 %0,%1 - fmr %0,%1 + xxlor %x0,%x1,%x1 + xxlor %x0,%x1,%x1 xxlxor %x0,%x0,%x0 mt%0 %1 mf%1 %0 @@ -8157,7 +8141,7 @@ (define_insn "*movdf_hardfloat64" # # #" - [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*") + [(set_attr "type" "fpstore,fpload,fp,store,load,*,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,mtjmpr,mfjmpr,*,*,*,*") (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")]) (define_insn "*movdf_softfloat64"