This patch fixes the problem reported in PR 78101 where the power9-fusion
support generates an insn that isn't matched:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78101

It also fixes the bug that Andrew Stubbs reported.
https://gcc.gnu.org/ml/gcc-patches/2016-11/msg01367.html

There were two bugs in the code:

    1)  The fusion peephole for SFmode and DFmode was inconsistant about
        whether those values were allowed in GPRs if software floating point is
        used.

    2)  The power9 fusion store insn had an early clobber in the match_scratch,
        which prevented having an address that uses a register, does ADDIS to
        add to the upper 16 bits, and then folds the lower ADDI into the store
        operation would not work if the address used the scratch register.

In addition to the two bugs, the fusion code had been written much earlier than
the support for the new ISA 3.0 scalar d-form (register+offset) instructions,
and the fusion code did not match these types of stores.  I have fixed this, so
that those memory references can also be fused.

I have bootstraped the compiler with these changes and there were no
regressions on the following systems:
    1)  Little endian power8
    2)  Big endian power8 (no support for 32-bit libraries)
    3)  Big endian power7 (support for 32-bit libraries)

I have also built and ran spec 2006 CPU tests with this option enabled, and
they run fine, with some minor performance changes on power8 using power9
fusion.

I have built the cam4_r and cam4_s benchmarks of the next generation Spec (kit
102) and they now compile fine with -mpower9-fusion (they were the source of
the bug 78101).

Are these patches ok to change into the trunk?  Since the bug shows up in GCC
6.x, can I apply and submit these patches to the GCC 6.x branch?

[gcc]
2016-11-17  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR target/78101
        * config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add
        the appropriate checks for SFmode/DFmode load/stores in GPR
        registers.
        (fusion_addis_mem_combo_store): Likewise.
        * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename
        fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar
        d-form instructions for traditional Altivec registers.
        (emit_fusion_p9_load): Likewise.
        (emit_fusion_p9_store): Likewise.
        * config/rs6000/rs6000.md (p9 fusion store peephole2): Remove
        early clobber from scratch register.  Do not match if the register
        being stored is the scratch register.
        (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load): Rename fusion_fpr_*
        to fusion_vsx_* and add in support for ISA 3.0 scalar d-form
        instructions for traditional Altivec registers.
        (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load): Likewise.
        (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store): Likewise.
        (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store): Likewise.

[gcc/testsuite]
2016-11-17  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR target/78101
        * gcc.target/powerpc/fusion4.c: New test.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md     
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)    
(revision 242456)
+++ gcc/config/rs6000/predicates.md     (.../gcc/config/rs6000) (working copy)
@@ -1844,7 +1844,7 @@ (define_predicate "fusion_gpr_mem_load"
 ;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the
 ;; memory field with both the addis and the memory offset.  Sign extension
 ;; is not handled here, since lha and lwa are not fused.
-;; With extended fusion, also match a FPR load (lfd, lfs) and float_extend
+;; With P9 fusion, also match a fpr/vector load and float_extend
 (define_predicate "fusion_addis_mem_combo_load"
   (match_code "mem,zero_extend,float_extend")
 {
@@ -1873,11 +1873,15 @@ (define_predicate "fusion_addis_mem_comb
       break;
 
     case SFmode:
-    case DFmode:
       if (!TARGET_P9_FUSION)
        return 0;
       break;
 
+    case DFmode:
+      if ((!TARGET_POWERPC64 && !TARGET_DF_FPR) || !TARGET_P9_FUSION)
+       return 0;
+      break;
+
     default:
       return 0;
     }
@@ -1920,6 +1924,7 @@ (define_predicate "fusion_addis_mem_comb
     case QImode:
     case HImode:
     case SImode:
+    case SFmode:
       break;
 
     case DImode:
@@ -1927,13 +1932,8 @@ (define_predicate "fusion_addis_mem_comb
        return 0;
       break;
 
-    case SFmode:
-      if (!TARGET_SF_FPR)
-       return 0;
-      break;
-
     case DFmode:
-      if (!TARGET_DF_FPR)
+      if (!TARGET_POWERPC64 && !TARGET_DF_FPR)
        return 0;
       break;
 
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)    
(revision 242456)
+++ gcc/config/rs6000/rs6000.c  (.../gcc/config/rs6000) (working copy)
@@ -3441,28 +3441,28 @@ rs6000_init_hard_regno_mode_ok (bool glo
 
       static const struct fuse_insns addis_insns[] = {
        { SFmode, DImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_di_sf_load,
-         CODE_FOR_fusion_fpr_di_sf_store },
+         CODE_FOR_fusion_vsx_di_sf_load,
+         CODE_FOR_fusion_vsx_di_sf_store },
 
        { SFmode, SImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_si_sf_load,
-         CODE_FOR_fusion_fpr_si_sf_store },
+         CODE_FOR_fusion_vsx_si_sf_load,
+         CODE_FOR_fusion_vsx_si_sf_store },
 
        { DFmode, DImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_di_df_load,
-         CODE_FOR_fusion_fpr_di_df_store },
+         CODE_FOR_fusion_vsx_di_df_load,
+         CODE_FOR_fusion_vsx_di_df_store },
 
        { DFmode, SImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_si_df_load,
-         CODE_FOR_fusion_fpr_si_df_store },
+         CODE_FOR_fusion_vsx_si_df_load,
+         CODE_FOR_fusion_vsx_si_df_store },
 
        { DImode, DImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_di_di_load,
-         CODE_FOR_fusion_fpr_di_di_store },
+         CODE_FOR_fusion_vsx_di_di_load,
+         CODE_FOR_fusion_vsx_di_di_store },
 
        { DImode, SImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_si_di_load,
-         CODE_FOR_fusion_fpr_si_di_store },
+         CODE_FOR_fusion_vsx_si_di_load,
+         CODE_FOR_fusion_vsx_si_di_store },
 
        { QImode, DImode, RELOAD_REG_GPR,
          CODE_FOR_fusion_gpr_di_qi_load,
@@ -3522,6 +3522,14 @@ rs6000_init_hard_regno_mode_ok (bool glo
 
          reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
          reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
+
+         if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
+           {
+             reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
+               = addis_insns[i].load;
+             reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
+               = addis_insns[i].store;
+           }
        }
     }
 
@@ -39817,6 +39825,15 @@ emit_fusion_p9_load (rtx reg, rtx mem, r
       else
        gcc_unreachable ();
     }
+  else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
+    {
+      if (mode == SFmode)
+       load_string = "lxssp";
+      else if (mode == DFmode || mode == DImode)
+       load_string = "lxsd";
+      else
+       gcc_unreachable ();
+    }
   else if (INT_REGNO_P (r))
     {
       switch (mode)
@@ -39895,6 +39912,15 @@ emit_fusion_p9_store (rtx mem, rtx reg, 
       else
        gcc_unreachable ();
     }
+  else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
+    {
+      if (mode == SFmode)
+       store_string = "stxssp";
+      else if (mode == DFmode || mode == DImode)
+       store_string = "stxsd";
+      else
+       gcc_unreachable ();
+    }
   else if (INT_REGNO_P (r))
     {
       switch (mode)
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md 
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)    
(revision 242456)
+++ gcc/config/rs6000/rs6000.md (.../gcc/config/rs6000) (working copy)
@@ -13438,7 +13438,8 @@ (define_peephole2
    (set (match_operand:SFDF 2 "offsettable_mem_operand" "")
        (match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))]
   "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0])
-   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])"
+   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])
+   && !rtx_equal_p (operands[0], operands[3])"
   [(const_int 0)]
 {
   expand_fusion_p9_store (operands);
@@ -13496,7 +13497,7 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F
        (unspec:GPR_FUSION
         [(match_operand:GPR_FUSION 1 "int_reg_operand" "r")]
         UNSPEC_FUSION_P9))
-   (clobber (match_operand:P 2 "base_reg_operand" "=&b"))]
+   (clobber (match_operand:P 2 "base_reg_operand" "=b"))]
   "TARGET_P9_FUSION"
 {
   return emit_fusion_p9_store (operands[0], operands[1], operands[2]);
@@ -13504,8 +13505,8 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F
   [(set_attr "type" "store")
    (set_attr "length" "8")])
 
-(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load"
-  [(set (match_operand:FPR_FUSION 0 "fpr_reg_operand" "=d")
+(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load"
+  [(set (match_operand:FPR_FUSION 0 "vsx_register_operand" "=dwb")
        (unspec:FPR_FUSION
         [(match_operand:FPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")]
         UNSPEC_FUSION_P9))
@@ -13517,10 +13518,10 @@ (define_insn "fusion_fpr_<P:mode>_<FPR_F
   [(set_attr "type" "fpload")
    (set_attr "length" "8")])
 
-(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store"
+(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store"
   [(set (match_operand:FPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF")
        (unspec:FPR_FUSION
-        [(match_operand:FPR_FUSION 1 "fpr_reg_operand" "d")]
+        [(match_operand:FPR_FUSION 1 "vsx_register_operand" "dwb")]
         UNSPEC_FUSION_P9))
    (clobber (match_operand:P 2 "base_reg_operand" "=b"))]
   "TARGET_P9_FUSION"
Index: gcc/testsuite/gcc.target/powerpc/fusion4.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/fusion4.c  
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
     (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/fusion4.c  
(.../gcc/testsuite/gcc.target/powerpc)  (revision 242499)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power7" } } */
+/* { dg-options "-mcpu=power7 -mtune=power9 -O3 -msoft-float -m32" } */
+
+#define LARGE 0x12345
+
+float fusion_float_read (float *p){ return p[LARGE]; }
+
+void fusion_float_write (float *p, float f){ p[LARGE] = f; }
+
+/* { dg-final { scan-assembler "store fusion, type SF" } } */

Reply via email to