Hello Segher:

Here is the patch that uses xxlor instead of fmr where possible.
Performance results shows that fmr is better in power9 and 
power10 architectures whereas xxlor is better in power7 and
power 8 architectures. fmr is the only option before p7.

Incorporated review comments.

Bootstrapped and regtested on powerpc64-linux-gnu

Thanks & Regards
Ajit

rs6000: Use xxlor instead of fmr where possible

Replaces fmr with xxlor instruction for power7 and power8
architectures whereas for power9 and power10 keep fmr
instruction.

Perf measurement results:

Power9 fmr:  201,847,661 cycles.
Power9 xxlor: 201,877,78 cycles.
Power8 fmr: 200,901,043 cycles.
Power8 xxlor: 201,020,518 cycles.
Power7 fmr: 201,059,524 cycles.
Power7 xxlor: 201,042,851 cycles.

2023-10-10  Ajit Kumar Agarwal  <aagar...@linux.ibm.com>

gcc/ChangeLog:

        * config/rs6000/rs6000.md (*movdf_hardfloat64): Use xxlor for power7
        and power8 and fmr for power9 and power10.
---
 gcc/config/rs6000/rs6000.md | 45 ++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d4337ce42a9..46982637d79 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -355,7 +355,7 @@
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p7p8v,p9,p9v,p9kf,p9tf,p10"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -403,6 +403,12 @@
      (and (eq_attr "isa" "p10")
          (match_test "TARGET_POWER10"))
      (const_int 1)
+
+     (and (eq_attr "isa" "p7p8v")
+         (match_test "rs6000_tune != PROCESSOR_POWER9
+                      && TARGET_VSX && !TARGET_P9_VECTOR"))
+     (const_int 1)
+
     ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
@@ -8551,27 +8557,29 @@
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
-           "=m,           d,          d,          <f64_p9>,   wY,
-             <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
+           "=m,           d,          <f64_vsx>,  <f64_p9>,   wY,
+             <f64_av>,    Z,          wa,         <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   d,          wn,
+            wa")
        (match_operand:FMOVE64 1 "input_operand"
-            "d,           m,          d,          wY,         <f64_p9>,
-             Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
+            "d,           m,          <f64_vsx>,  wY,         <f64_p9>,
+             Z,           <f64_av>,   wa,         <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eP"))]
+             0,           <f64_dm>,   r,          d,          wn,
+             eP"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
-   fmr %0,%1
+   xxlor %x0,%x1,%x1
    lxsd %0,%1
    stxsd %1,%0
    lxsdx %x0,%y1
    stxsdx %x1,%y0
-   xxlor %x0,%x1,%x1
+   fmr %0,%1
    xxlxor %x0,%x0,%x0
    li %0,0
    std%U0%X0 %1,%0
@@ -8582,23 +8590,28 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   fmr %0,%1
+   fmr %0,%1
    #"
   [(set_attr "type"
-            "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
+            "fpstore,     fpload,     veclogical, fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      fpsimple,   fpsimple,
+             vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
-            "*,           *,          *,          p9v,        p9v,
-             p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")
+            "*,          *,          p7p8v,       p9v,        p9v,
+             p7v,         p7v,        *,           *,          *,
+             *,           *,          *,           *,          *,
+             *,           p8v,        p8v,         *,          *,
+             p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          *")])
+             *,           *,          *,          *,          *,
+             *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
-- 
2.39.3

Reply via email to