This patch fixes the problem reported in PR 78101 where the power9-fusion support generates an insn that isn't matched: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78101
It also fixes the bug that Andrew Stubbs reported. https://gcc.gnu.org/ml/gcc-patches/2016-11/msg01367.html There were two bugs in the code: 1) The fusion peephole for SFmode and DFmode was inconsistant about whether those values were allowed in GPRs if software floating point is used. 2) The power9 fusion store insn had an early clobber in the match_scratch, which prevented having an address that uses a register, does ADDIS to add to the upper 16 bits, and then folds the lower ADDI into the store operation would not work if the address used the scratch register. In addition to the two bugs, the fusion code had been written much earlier than the support for the new ISA 3.0 scalar d-form (register+offset) instructions, and the fusion code did not match these types of stores. I have fixed this, so that those memory references can also be fused. I have bootstraped the compiler with these changes and there were no regressions on the following systems: 1) Little endian power8 2) Big endian power8 (no support for 32-bit libraries) 3) Big endian power7 (support for 32-bit libraries) I have also built and ran spec 2006 CPU tests with this option enabled, and they run fine, with some minor performance changes on power8 using power9 fusion. I have built the cam4_r and cam4_s benchmarks of the next generation Spec (kit 102) and they now compile fine with -mpower9-fusion (they were the source of the bug 78101). Are these patches ok to change into the trunk? Since the bug shows up in GCC 6.x, can I apply and submit these patches to the GCC 6.x branch? [gcc] 2016-11-17 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/78101 * config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add the appropriate checks for SFmode/DFmode load/stores in GPR registers. (fusion_addis_mem_combo_store): Likewise. * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar d-form instructions for traditional Altivec registers. (emit_fusion_p9_load): Likewise. (emit_fusion_p9_store): Likewise. * config/rs6000/rs6000.md (p9 fusion store peephole2): Remove early clobber from scratch register. Do not match if the register being stored is the scratch register. (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load): Rename fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar d-form instructions for traditional Altivec registers. (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load): Likewise. (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store): Likewise. (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store): Likewise. [gcc/testsuite] 2016-11-17 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/78101 * gcc.target/powerpc/fusion4.c: New test. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md =================================================================== --- gcc/config/rs6000/predicates.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 242456) +++ gcc/config/rs6000/predicates.md (.../gcc/config/rs6000) (working copy) @@ -1844,7 +1844,7 @@ (define_predicate "fusion_gpr_mem_load" ;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the ;; memory field with both the addis and the memory offset. Sign extension ;; is not handled here, since lha and lwa are not fused. -;; With extended fusion, also match a FPR load (lfd, lfs) and float_extend +;; With P9 fusion, also match a fpr/vector load and float_extend (define_predicate "fusion_addis_mem_combo_load" (match_code "mem,zero_extend,float_extend") { @@ -1873,11 +1873,15 @@ (define_predicate "fusion_addis_mem_comb break; case SFmode: - case DFmode: if (!TARGET_P9_FUSION) return 0; break; + case DFmode: + if ((!TARGET_POWERPC64 && !TARGET_DF_FPR) || !TARGET_P9_FUSION) + return 0; + break; + default: return 0; } @@ -1920,6 +1924,7 @@ (define_predicate "fusion_addis_mem_comb case QImode: case HImode: case SImode: + case SFmode: break; case DImode: @@ -1927,13 +1932,8 @@ (define_predicate "fusion_addis_mem_comb return 0; break; - case SFmode: - if (!TARGET_SF_FPR) - return 0; - break; - case DFmode: - if (!TARGET_DF_FPR) + if (!TARGET_POWERPC64 && !TARGET_DF_FPR) return 0; break; Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 242456) +++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000) (working copy) @@ -3441,28 +3441,28 @@ rs6000_init_hard_regno_mode_ok (bool glo static const struct fuse_insns addis_insns[] = { { SFmode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_sf_load, - CODE_FOR_fusion_fpr_di_sf_store }, + CODE_FOR_fusion_vsx_di_sf_load, + CODE_FOR_fusion_vsx_di_sf_store }, { SFmode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_sf_load, - CODE_FOR_fusion_fpr_si_sf_store }, + CODE_FOR_fusion_vsx_si_sf_load, + CODE_FOR_fusion_vsx_si_sf_store }, { DFmode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_df_load, - CODE_FOR_fusion_fpr_di_df_store }, + CODE_FOR_fusion_vsx_di_df_load, + CODE_FOR_fusion_vsx_di_df_store }, { DFmode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_df_load, - CODE_FOR_fusion_fpr_si_df_store }, + CODE_FOR_fusion_vsx_si_df_load, + CODE_FOR_fusion_vsx_si_df_store }, { DImode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_di_load, - CODE_FOR_fusion_fpr_di_di_store }, + CODE_FOR_fusion_vsx_di_di_load, + CODE_FOR_fusion_vsx_di_di_store }, { DImode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_di_load, - CODE_FOR_fusion_fpr_si_di_store }, + CODE_FOR_fusion_vsx_si_di_load, + CODE_FOR_fusion_vsx_si_di_store }, { QImode, DImode, RELOAD_REG_GPR, CODE_FOR_fusion_gpr_di_qi_load, @@ -3522,6 +3522,14 @@ rs6000_init_hard_regno_mode_ok (bool glo reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load; reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store; + + if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR) + { + reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX] + = addis_insns[i].load; + reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX] + = addis_insns[i].store; + } } } @@ -39817,6 +39825,15 @@ emit_fusion_p9_load (rtx reg, rtx mem, r else gcc_unreachable (); } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + load_string = "lxssp"; + else if (mode == DFmode || mode == DImode) + load_string = "lxsd"; + else + gcc_unreachable (); + } else if (INT_REGNO_P (r)) { switch (mode) @@ -39895,6 +39912,15 @@ emit_fusion_p9_store (rtx mem, rtx reg, else gcc_unreachable (); } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + store_string = "stxssp"; + else if (mode == DFmode || mode == DImode) + store_string = "stxsd"; + else + gcc_unreachable (); + } else if (INT_REGNO_P (r)) { switch (mode) Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 242456) +++ gcc/config/rs6000/rs6000.md (.../gcc/config/rs6000) (working copy) @@ -13438,7 +13438,8 @@ (define_peephole2 (set (match_operand:SFDF 2 "offsettable_mem_operand" "") (match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))] "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0]) - && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])" + && fusion_p9_p (operands[0], operands[1], operands[2], operands[3]) + && !rtx_equal_p (operands[0], operands[3])" [(const_int 0)] { expand_fusion_p9_store (operands); @@ -13496,7 +13497,7 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F (unspec:GPR_FUSION [(match_operand:GPR_FUSION 1 "int_reg_operand" "r")] UNSPEC_FUSION_P9)) - (clobber (match_operand:P 2 "base_reg_operand" "=&b"))] + (clobber (match_operand:P 2 "base_reg_operand" "=b"))] "TARGET_P9_FUSION" { return emit_fusion_p9_store (operands[0], operands[1], operands[2]); @@ -13504,8 +13505,8 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F [(set_attr "type" "store") (set_attr "length" "8")]) -(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load" - [(set (match_operand:FPR_FUSION 0 "fpr_reg_operand" "=d") +(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load" + [(set (match_operand:FPR_FUSION 0 "vsx_register_operand" "=dwb") (unspec:FPR_FUSION [(match_operand:FPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")] UNSPEC_FUSION_P9)) @@ -13517,10 +13518,10 @@ (define_insn "fusion_fpr_<P:mode>_<FPR_F [(set_attr "type" "fpload") (set_attr "length" "8")]) -(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store" +(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store" [(set (match_operand:FPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF") (unspec:FPR_FUSION - [(match_operand:FPR_FUSION 1 "fpr_reg_operand" "d")] + [(match_operand:FPR_FUSION 1 "vsx_register_operand" "dwb")] UNSPEC_FUSION_P9)) (clobber (match_operand:P 2 "base_reg_operand" "=b"))] "TARGET_P9_FUSION" Index: gcc/testsuite/gcc.target/powerpc/fusion4.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/fusion4.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/fusion4.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242499) @@ -0,0 +1,13 @@ +/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-mcpu=power7 -mtune=power9 -O3 -msoft-float -m32" } */ + +#define LARGE 0x12345 + +float fusion_float_read (float *p){ return p[LARGE]; } + +void fusion_float_write (float *p, float f){ p[LARGE] = f; } + +/* { dg-final { scan-assembler "store fusion, type SF" } } */