I am working on support for a future processor, and I noticed that when I did
the power7 work initially in 2009, that I ordered the DF moves so that the VSX
moves came before traditional floating point moves.

If reload needs to reload a floating point register, it will match first on the
VSX instructions and generate LXSDX or STXSDX instead of the traditional
LFD/LFDX and STFD/STFDX instructions.  Because the LXSDX/STXSDX instructions
are only REG+REG, reload needs to generate the stack offset in a GPR and use
this.  Note, for normal loads/stores, the register allocator will see if there
are other options, and eventually match against the traditional floating point
load and store.  Reload however, seems to stop as soon as it finds an
appropriate instruction.

The following patch reorders the movdf patterns so that first the traditional
floating point registers are considered, then the VSX registers, and finally
the general purpose registers.  I have bootstrapped the compiler with these
changes, and had no regressions in the testsuite.

I also ran the spec 2006 benchmark suite with/without these patches (using
subversion id 193503 as the base).  There were no slow downs that were outside
of the normal range that I consider to be noise level (2%).  The 447.dealII
benchmark sped up by 14% (456.hmmer and 471.omnetpp sped up by 2%).

Are these patches ok to apply?

2012-11-19  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/rs6000.md (movdf_hardfloat32): Reorder move
        constraints so that the traditional floating point loads, stores,
        and moves are done first, then the VSX loads, stores, and moves,
        and finally the GPR loads, stores, and moves so that reload
        chooses FPRs over GPRs, and uses the traditional load/store
        instructions which provide an offset.
        (movdf_hardfloat64): Likewise.

-- 
Michael Meissner, IBM
5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
meiss...@linux.vnet.ibm.com     fax +1 (978) 399-6899
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 193635)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -8019,46 +8019,30 @@ (define_split
 ;; less efficient than loading the constant into an FP register, since
 ;; it will probably be used there.
 (define_insn "*movdf_hardfloat32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" 
"=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,!r,!r,!r")
-       (match_operand:DF 1 "input_operand" 
"r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,G,H,F"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" 
"=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r")
+       (match_operand:DF 1 "input_operand" 
"d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && 
TARGET_DOUBLE_FLOAT 
    && (gpc_reg_operand (operands[0], DFmode)
        || gpc_reg_operand (operands[1], DFmode))"
-  "*
-{
-  switch (which_alternative)
-    {
-    default:
-      gcc_unreachable ();
-    case 0:
-    case 1:
-    case 2:
-      return \"#\";
-    case 3:
-    case 4:
-      return \"xxlor %x0,%x1,%x1\";
-    case 5:
-    case 6:
-      return \"lxsd%U1x %x0,%y1\";
-    case 7:
-    case 8:
-      return \"stxsd%U0x %x1,%y0\";
-    case 9:
-      return \"stfd%U0%X0 %1,%0\";
-    case 10:
-      return \"lfd%U1%X1 %0,%1\";
-    case 11:
-      return \"fmr %0,%1\";
-    case 12:
-      return \"xxlxor %x0,%x0,%x0\";
-    case 13:
-    case 14:
-    case 15:
-      return \"#\";
-    }
-}"
-  [(set_attr "type" 
"store,load,two,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,*,*,*")
-   (set_attr "length" "8,8,8,4,4,4,4,4,4,4,4,4,4,8,12,16")])
+  "@
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
+   lxsd%U1x %x0,%y1
+   lxsd%U1x %x0,%y1
+   stxsd%U0x %x1,%y0
+   stxsd%U0x %x1,%y0
+   xxlor %x0,%x1,%x1
+   xxlor %x0,%x1,%x1
+   xxlxor %x0,%x0,%x0
+   #
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" 
"fpstore,fpload,fp,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,store,load,two,fp,fp,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")])
 
 (define_insn "*movdf_softfloat32"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
@@ -8131,25 +8115,25 @@ (define_insn "*movdf_hardfloat64_mfpgpr"
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*movdf_hardfloat64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" 
"=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r")
-       (match_operand:DF 1 "input_operand" 
"r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" 
"=m,d,d,Y,r,!r,ws,?wa,Z,?Z,ws,?wa,wa,*c*l,!r,*h,!r,!r,!r")
+       (match_operand:DF 1 "input_operand" 
"d,m,d,r,Y,r,Z,Z,ws,wa,ws,wa,j,r,h,0,G,H,F"))]
   "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
    && TARGET_DOUBLE_FLOAT
    && (gpc_reg_operand (operands[0], DFmode)
        || gpc_reg_operand (operands[1], DFmode))"
   "@
+   stfd%U0%X0 %1,%0
+   lfd%U1%X1 %0,%1
+   fmr %0,%1
    std%U0%X0 %1,%0
    ld%U1%X1 %0,%1
    mr %0,%1
-   xxlor %x0,%x1,%x1
-   xxlor %x0,%x1,%x1
    lxsd%U1x %x0,%y1
    lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
    stxsd%U0x %x1,%y0
-   stfd%U0%X0 %1,%0
-   lfd%U1%X1 %0,%1
-   fmr %0,%1
+   xxlor %x0,%x1,%x1
+   xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    mt%0 %1
    mf%1 %0
@@ -8157,7 +8141,7 @@ (define_insn "*movdf_hardfloat64"
    #
    #
    #"
-  [(set_attr "type" 
"store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
+  [(set_attr "type" 
"fpstore,fpload,fp,store,load,*,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
    (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
 
 (define_insn "*movdf_softfloat64"

Reply via email to