Ping.

Aaron Sawdey, Ph.D. saw...@linux.ibm.com
IBM Linux on POWER Toolchain
 

> On Oct 26, 2020, at 4:44 PM, acsaw...@linux.ibm.com wrote:
> 
> From: Aaron Sawdey <acsaw...@linux.ibm.com>
> 
> This patch adds the first couple patterns to support p10 fusion. These
> will allow combine to create a single insn for a pair of instructions
> that that power10 can fuse and execute. These particular ones have the
> requirement that only cr0 can be used when fusing a load with a compare
> immediate of -1/0/1, so we want combine to put that requirement in, and
> if it doesn't work out later the splitter can get used.
> 
> This also adds option -mpower10-fusion which defaults on for power10 and
> will gate all these fusion patterns. In addition I have added an
> undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
> that just controls the load+compare-immediate patterns. I have make
> these default on for power10 but they are not disallowed for earlier
> processors because it is still valid code. This allows us to test the
> correctness of fusion code generation by turning it on explicitly.
> 
> The intention is to work through more patterns of this style to support
> the rest of the power10 fusion pairs.
> 
> Bootstrap and regtest looks good on ppc64le power9 with these patterns
> enabled in stage2/stage3 and for regtest. Ok for trunk?
> 
> gcc/ChangeLog:
> 
>       * config/rs6000/predicates.md: Add const_me_to_1_operand.
>       * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
>       OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER.
>       * config/rs6000/rs6000-protos.h (address_ok_for_form): Add
>       prototype.
>       * config/rs6000/rs6000.c (rs6000_option_override_internal):
>       automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
>       if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
>       in function attributes.  (address_ok_for_form): New function.
>       * config/rs6000/rs6000.h: Add MASK_P10_FUSION.
>       * config/rs6000/rs6000.md (*ld_cmpi_cr0): New
>       define_insn_and_split.
>       (*lwa_cmpdi_cr0): New define_insn_and_split.
>       (*lwa_cmpwi_cr0): New define_insn_and_split.
>       * config/rs6000/rs6000.opt: Add -mpower10-fusion
>       and -mpower10-fusion-ld-cmpi.
> ---
> gcc/config/rs6000/predicates.md   |  5 +++
> gcc/config/rs6000/rs6000-cpus.def |  6 ++-
> gcc/config/rs6000/rs6000-protos.h |  2 +
> gcc/config/rs6000/rs6000.c        | 34 ++++++++++++++++
> gcc/config/rs6000/rs6000.h        |  1 +
> gcc/config/rs6000/rs6000.md       | 68 +++++++++++++++++++++++++++++++
> gcc/config/rs6000/rs6000.opt      |  8 ++++
> 7 files changed, 123 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 4c2fe7fa312..b75c1ddfb69 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand"
>   (and (match_code "const_int")
>        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
> 
> +;; Match op = -1, op = 0, or op = 1.
> +(define_predicate "const_m1_to_1_operand"
> +  (and (match_code "const_int")
> +       (match_test "IN_RANGE (INTVAL (op), -1, 1)")))
> +
> ;; Match op = 0..3.
> (define_predicate "const_0_to_3_operand"
>   (and (match_code "const_int")
> diff --git a/gcc/config/rs6000/rs6000-cpus.def 
> b/gcc/config/rs6000/rs6000-cpus.def
> index 8d2c1ffd6cf..3e65289d8df 100644
> --- a/gcc/config/rs6000/rs6000-cpus.def
> +++ b/gcc/config/rs6000/rs6000-cpus.def
> @@ -82,7 +82,9 @@
> 
> #define ISA_3_1_MASKS_SERVER  (ISA_3_0_MASKS_SERVER                   \
>                                | OPTION_MASK_POWER10                  \
> -                              | OTHER_POWER10_MASKS)
> +                              | OTHER_POWER10_MASKS                  \
> +                              | OPTION_MASK_P10_FUSION               \
> +                              | OPTION_MASK_P10_FUSION_LD_CMPI)
> 
> /* Flags that need to be turned off if -mno-power9-vector.  */
> #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW                \
> @@ -129,6 +131,8 @@
>                                | OPTION_MASK_FLOAT128_KEYWORD         \
>                                | OPTION_MASK_FPRND                    \
>                                | OPTION_MASK_POWER10                  \
> +                              | OPTION_MASK_P10_FUSION               \
> +                              | OPTION_MASK_P10_FUSION_LD_CMPI       \
>                                | OPTION_MASK_HTM                      \
>                                | OPTION_MASK_ISEL                     \
>                                | OPTION_MASK_MFCRF                    \
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 25fa5dd57cd..d8a344245e6 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -190,6 +190,8 @@ enum non_prefixed_form {
> 
> extern enum insn_form address_to_insn_form (rtx, machine_mode,
>                                           enum non_prefixed_form);
> +extern bool address_ok_for_form (rtx, machine_mode,
> +                              enum non_prefixed_form);
> extern bool prefixed_load_p (rtx_insn *);
> extern bool prefixed_store_p (rtx_insn *);
> extern bool prefixed_paddi_p (rtx_insn *);
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 4d528a39a37..b8de318a0bc 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p)
>   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
>     rs6000_isa_flags |= OPTION_MASK_MMA;
> 
> +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) 
> == 0)
> +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
> +
> +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & 
> OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
> +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
> +
>   /* Turn off vector pair/mma options on non-power10 systems.  */
>   else if (!TARGET_POWER10 && TARGET_MMA)
>     {
> @@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const 
> rs6000_opt_masks[] =
>   { "power9-minmax",          OPTION_MASK_P9_MINMAX,          false, true  },
>   { "power9-misc",            OPTION_MASK_P9_MISC,            false, true  },
>   { "power9-vector",          OPTION_MASK_P9_VECTOR,          false, true  },
> +  { "power10-fusion",                OPTION_MASK_P10_FUSION,         false, 
> true  },
>   { "powerpc-gfxopt",         OPTION_MASK_PPC_GFXOPT,         false, true  },
>   { "powerpc-gpopt",          OPTION_MASK_PPC_GPOPT,          false, true  },
>   { "prefixed",                       OPTION_MASK_PREFIXED,           false, 
> true  },
> @@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr,
>   return INSN_FORM_BAD;
> }
> 
> +bool
> +address_ok_for_form (rtx addr,
> +                  machine_mode mode,
> +                  enum non_prefixed_form non_prefixed_format)
> +{
> +  enum insn_form result_form;
> +
> +  result_form = address_to_insn_form (addr, mode, non_prefixed_format);
> +
> +  switch (non_prefixed_format)
> +    {
> +    case NON_PREFIXED_DS:
> +      switch (result_form)
> +     {
> +     case INSN_FORM_DS:
> +     case INSN_FORM_BASE_REG:
> +       return true;
> +     default:
> +       break;
> +     }
> +      break;
> +    default:
> +      break;
> +    }
> +  return false;
> +}
> +
> /* Helper function to see if we're potentially looking at lfs/stfs.
>    - PARALLEL containing a SET and a CLOBBER
>    - stfs:
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index bbd8060e143..884452fc6d9 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -539,6 +539,7 @@ extern int rs6000_vector_align[];
> #define MASK_UPDATE                   OPTION_MASK_UPDATE
> #define MASK_VSX                      OPTION_MASK_VSX
> #define MASK_POWER10                  OPTION_MASK_POWER10
> +#define MASK_P10_FUSION                      OPTION_MASK_P10_FUSION
> 
> #ifndef IN_LIBGCC2
> #define MASK_POWERPC64                        OPTION_MASK_POWERPC64
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index dc060143104..bbcc6abe0f9 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2"
>    (set_attr "dot" "yes")
>    (set_attr "length" "4,8")])
> 
> +;; Define an insn for ld+cmpi so we can force it to use CR0 on p10
> +;; immediate has to be -1/0/1
> +(define_insn_and_split "*ld_cmpi_cr0"
> +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +     (compare:CC (match_operand:DI 1 "memory_operand" "m")
> +                 (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +     (match_dup 1))
> +   ]
> +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> +  "ld %0,%1\;cmpi 0,1,%0,%3"
> +  "&& reload_completed
> +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> +       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
> NON_PREFIXED_DS))"
> +  [(set (match_dup 0) (match_dup 1))
> +   (set (match_dup 2)
> +        (compare:CC (match_dup 0)
> +                 (match_dup 3)))]
> +  ""
> +  [(set_attr "type" "load")
> +   (set_attr "length" "8")])
> +
> +;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10
> +;; immediate is -1/0/1
> +(define_insn_and_split "*lwa_cmpdi_cr0"
> +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +     (compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m"))
> +                 (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +     (sign_extend:DI (match_dup 1)))
> +   ]
> +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> +  "lwa %0,%1\;cmpdi %0,%3"
> +  "&& reload_completed
> +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> +       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
> NON_PREFIXED_DS))"
> +  [(set (match_dup 0)
> +     (sign_extend:DI (match_dup 1)))
> +   (set (match_dup 2)
> +        (compare:CC (match_dup 0)
> +                 (match_dup 3)))]
> +  ""
> +  [(set_attr "type" "load")
> +   (set_attr "length" "8")])
> +
> +;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10
> +;; immediate is -1/0/1
> +(define_insn_and_split "*lwa_cmpwi_cr0"
> +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +     (compare:CC (match_operand:SI 1 "memory_operand" "m")
> +                 (match_operand:SI 3 "const_m1_to_1_operand" "n")))
> +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +     (sign_extend:DI (match_dup 1)))
> +   ]
> +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> +  "lwa %0,%1\;cmpwi %0,%3"
> +  "&& reload_completed
> +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> +       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
> NON_PREFIXED_DS))"
> +  [(set (match_dup 0)
> +     (sign_extend:DI (match_dup 1)))
> +   (set (match_dup 2)
> +        (compare:CC (match_dup 0)
> +                 (match_dup 3)))]
> +  ""
> +  [(set_attr "type" "load")
> +   (set_attr "length" "8")])
> +
> ;; Split an add that we can't do in one insn into two insns, each of which
> ;; does one 16-bit part.  This is used by combine.  Note that the low-order
> ;; add should be last in case the result gets used in an address.
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index b2a70e88ca8..63457efb607 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -479,6 +479,14 @@ mpower8-vector
> Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
> Use vector and scalar instructions added in ISA 2.07.
> 
> +mpower10-fusion
> +Target Report Mask(P10_FUSION) Var(rs6000_isa_flags)
> +Fuse certain integer operations together for better performance on power10.
> +
> +mpower10-fusion-ld-cmpi
> +Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags)
> +Fuse certain integer operations together for better performance on power10.
> +
> mcrypto
> Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
> Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
> -- 
> 2.18.4
> 

Reply via email to