Ping. Aaron Sawdey, Ph.D. saw...@linux.ibm.com IBM Linux on POWER Toolchain
> On Oct 26, 2020, at 4:44 PM, acsaw...@linux.ibm.com wrote: > > From: Aaron Sawdey <acsaw...@linux.ibm.com> > > This patch adds the first couple patterns to support p10 fusion. These > will allow combine to create a single insn for a pair of instructions > that that power10 can fuse and execute. These particular ones have the > requirement that only cr0 can be used when fusing a load with a compare > immediate of -1/0/1, so we want combine to put that requirement in, and > if it doesn't work out later the splitter can get used. > > This also adds option -mpower10-fusion which defaults on for power10 and > will gate all these fusion patterns. In addition I have added an > undocumented option -mpower10-fusion-ld-cmpi (which may be removed later) > that just controls the load+compare-immediate patterns. I have make > these default on for power10 but they are not disallowed for earlier > processors because it is still valid code. This allows us to test the > correctness of fusion code generation by turning it on explicitly. > > The intention is to work through more patterns of this style to support > the rest of the power10 fusion pairs. > > Bootstrap and regtest looks good on ppc64le power9 with these patterns > enabled in stage2/stage3 and for regtest. Ok for trunk? > > gcc/ChangeLog: > > * config/rs6000/predicates.md: Add const_me_to_1_operand. > * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and > OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER. > * config/rs6000/rs6000-protos.h (address_ok_for_form): Add > prototype. > * config/rs6000/rs6000.c (rs6000_option_override_internal): > automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi > if target is power10. (rs600_opt_masks): Allow -mpower10-fusion > in function attributes. (address_ok_for_form): New function. > * config/rs6000/rs6000.h: Add MASK_P10_FUSION. > * config/rs6000/rs6000.md (*ld_cmpi_cr0): New > define_insn_and_split. > (*lwa_cmpdi_cr0): New define_insn_and_split. > (*lwa_cmpwi_cr0): New define_insn_and_split. > * config/rs6000/rs6000.opt: Add -mpower10-fusion > and -mpower10-fusion-ld-cmpi. > --- > gcc/config/rs6000/predicates.md | 5 +++ > gcc/config/rs6000/rs6000-cpus.def | 6 ++- > gcc/config/rs6000/rs6000-protos.h | 2 + > gcc/config/rs6000/rs6000.c | 34 ++++++++++++++++ > gcc/config/rs6000/rs6000.h | 1 + > gcc/config/rs6000/rs6000.md | 68 +++++++++++++++++++++++++++++++ > gcc/config/rs6000/rs6000.opt | 8 ++++ > 7 files changed, 123 insertions(+), 1 deletion(-) > > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index 4c2fe7fa312..b75c1ddfb69 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand" > (and (match_code "const_int") > (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) > > +;; Match op = -1, op = 0, or op = 1. > +(define_predicate "const_m1_to_1_operand" > + (and (match_code "const_int") > + (match_test "IN_RANGE (INTVAL (op), -1, 1)"))) > + > ;; Match op = 0..3. > (define_predicate "const_0_to_3_operand" > (and (match_code "const_int") > diff --git a/gcc/config/rs6000/rs6000-cpus.def > b/gcc/config/rs6000/rs6000-cpus.def > index 8d2c1ffd6cf..3e65289d8df 100644 > --- a/gcc/config/rs6000/rs6000-cpus.def > +++ b/gcc/config/rs6000/rs6000-cpus.def > @@ -82,7 +82,9 @@ > > #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \ > | OPTION_MASK_POWER10 \ > - | OTHER_POWER10_MASKS) > + | OTHER_POWER10_MASKS \ > + | OPTION_MASK_P10_FUSION \ > + | OPTION_MASK_P10_FUSION_LD_CMPI) > > /* Flags that need to be turned off if -mno-power9-vector. */ > #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ > @@ -129,6 +131,8 @@ > | OPTION_MASK_FLOAT128_KEYWORD \ > | OPTION_MASK_FPRND \ > | OPTION_MASK_POWER10 \ > + | OPTION_MASK_P10_FUSION \ > + | OPTION_MASK_P10_FUSION_LD_CMPI \ > | OPTION_MASK_HTM \ > | OPTION_MASK_ISEL \ > | OPTION_MASK_MFCRF \ > diff --git a/gcc/config/rs6000/rs6000-protos.h > b/gcc/config/rs6000/rs6000-protos.h > index 25fa5dd57cd..d8a344245e6 100644 > --- a/gcc/config/rs6000/rs6000-protos.h > +++ b/gcc/config/rs6000/rs6000-protos.h > @@ -190,6 +190,8 @@ enum non_prefixed_form { > > extern enum insn_form address_to_insn_form (rtx, machine_mode, > enum non_prefixed_form); > +extern bool address_ok_for_form (rtx, machine_mode, > + enum non_prefixed_form); > extern bool prefixed_load_p (rtx_insn *); > extern bool prefixed_store_p (rtx_insn *); > extern bool prefixed_paddi_p (rtx_insn *); > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 4d528a39a37..b8de318a0bc 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p) > if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0) > rs6000_isa_flags |= OPTION_MASK_MMA; > > + if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) > == 0) > + rs6000_isa_flags |= OPTION_MASK_P10_FUSION; > + > + if (TARGET_POWER10 && (rs6000_isa_flags_explicit & > OPTION_MASK_P10_FUSION_LD_CMPI) == 0) > + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI; > + > /* Turn off vector pair/mma options on non-power10 systems. */ > else if (!TARGET_POWER10 && TARGET_MMA) > { > @@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const > rs6000_opt_masks[] = > { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true }, > { "power9-misc", OPTION_MASK_P9_MISC, false, true }, > { "power9-vector", OPTION_MASK_P9_VECTOR, false, true }, > + { "power10-fusion", OPTION_MASK_P10_FUSION, false, > true }, > { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, > { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, > { "prefixed", OPTION_MASK_PREFIXED, false, > true }, > @@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr, > return INSN_FORM_BAD; > } > > +bool > +address_ok_for_form (rtx addr, > + machine_mode mode, > + enum non_prefixed_form non_prefixed_format) > +{ > + enum insn_form result_form; > + > + result_form = address_to_insn_form (addr, mode, non_prefixed_format); > + > + switch (non_prefixed_format) > + { > + case NON_PREFIXED_DS: > + switch (result_form) > + { > + case INSN_FORM_DS: > + case INSN_FORM_BASE_REG: > + return true; > + default: > + break; > + } > + break; > + default: > + break; > + } > + return false; > +} > + > /* Helper function to see if we're potentially looking at lfs/stfs. > - PARALLEL containing a SET and a CLOBBER > - stfs: > diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h > index bbd8060e143..884452fc6d9 100644 > --- a/gcc/config/rs6000/rs6000.h > +++ b/gcc/config/rs6000/rs6000.h > @@ -539,6 +539,7 @@ extern int rs6000_vector_align[]; > #define MASK_UPDATE OPTION_MASK_UPDATE > #define MASK_VSX OPTION_MASK_VSX > #define MASK_POWER10 OPTION_MASK_POWER10 > +#define MASK_P10_FUSION OPTION_MASK_P10_FUSION > > #ifndef IN_LIBGCC2 > #define MASK_POWERPC64 OPTION_MASK_POWERPC64 > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index dc060143104..bbcc6abe0f9 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2" > (set_attr "dot" "yes") > (set_attr "length" "4,8")]) > > +;; Define an insn for ld+cmpi so we can force it to use CR0 on p10 > +;; immediate has to be -1/0/1 > +(define_insn_and_split "*ld_cmpi_cr0" > + [(set (match_operand:CC 2 "cc_reg_operand" "=x") > + (compare:CC (match_operand:DI 1 "memory_operand" "m") > + (match_operand:GPR 3 "const_m1_to_1_operand" "n"))) > + (set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (match_dup 1)) > + ] > + "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" > + "ld %0,%1\;cmpi 0,1,%0,%3" > + "&& reload_completed > + && (cc_reg_not_cr0_operand (operands[2], CCmode) > + || !address_ok_for_form (XEXP (operands[1],0), DImode, > NON_PREFIXED_DS))" > + [(set (match_dup 0) (match_dup 1)) > + (set (match_dup 2) > + (compare:CC (match_dup 0) > + (match_dup 3)))] > + "" > + [(set_attr "type" "load") > + (set_attr "length" "8")]) > + > +;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10 > +;; immediate is -1/0/1 > +(define_insn_and_split "*lwa_cmpdi_cr0" > + [(set (match_operand:CC 2 "cc_reg_operand" "=x") > + (compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m")) > + (match_operand:GPR 3 "const_m1_to_1_operand" "n"))) > + (set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (sign_extend:DI (match_dup 1))) > + ] > + "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" > + "lwa %0,%1\;cmpdi %0,%3" > + "&& reload_completed > + && (cc_reg_not_cr0_operand (operands[2], CCmode) > + || !address_ok_for_form (XEXP (operands[1],0), DImode, > NON_PREFIXED_DS))" > + [(set (match_dup 0) > + (sign_extend:DI (match_dup 1))) > + (set (match_dup 2) > + (compare:CC (match_dup 0) > + (match_dup 3)))] > + "" > + [(set_attr "type" "load") > + (set_attr "length" "8")]) > + > +;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10 > +;; immediate is -1/0/1 > +(define_insn_and_split "*lwa_cmpwi_cr0" > + [(set (match_operand:CC 2 "cc_reg_operand" "=x") > + (compare:CC (match_operand:SI 1 "memory_operand" "m") > + (match_operand:SI 3 "const_m1_to_1_operand" "n"))) > + (set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (sign_extend:DI (match_dup 1))) > + ] > + "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" > + "lwa %0,%1\;cmpwi %0,%3" > + "&& reload_completed > + && (cc_reg_not_cr0_operand (operands[2], CCmode) > + || !address_ok_for_form (XEXP (operands[1],0), DImode, > NON_PREFIXED_DS))" > + [(set (match_dup 0) > + (sign_extend:DI (match_dup 1))) > + (set (match_dup 2) > + (compare:CC (match_dup 0) > + (match_dup 3)))] > + "" > + [(set_attr "type" "load") > + (set_attr "length" "8")]) > + > ;; Split an add that we can't do in one insn into two insns, each of which > ;; does one 16-bit part. This is used by combine. Note that the low-order > ;; add should be last in case the result gets used in an address. > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > index b2a70e88ca8..63457efb607 100644 > --- a/gcc/config/rs6000/rs6000.opt > +++ b/gcc/config/rs6000/rs6000.opt > @@ -479,6 +479,14 @@ mpower8-vector > Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags) > Use vector and scalar instructions added in ISA 2.07. > > +mpower10-fusion > +Target Report Mask(P10_FUSION) Var(rs6000_isa_flags) > +Fuse certain integer operations together for better performance on power10. > + > +mpower10-fusion-ld-cmpi > +Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags) > +Fuse certain integer operations together for better performance on power10. > + > mcrypto > Target Report Mask(CRYPTO) Var(rs6000_isa_flags) > Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions. > -- > 2.18.4 >