Re: [PATCH][AArch64] Improve Cortex-A53 FP scheduler

Richard Earnshaw (lists) Wed, 14 Jun 2017 07:24:06 -0700

On 12/06/17 14:16, Wilco Dijkstra wrote:
> The Cortex-A53 scheduler model of FMAC bypass is not quite right
> for FMAC to FMAC forwarding.  Experiments also show the latencies of
> FP operations are too high as well.  Rather than adding more bypasses,
> adjust the latencies of FP instructions to get a better schedule on
> average.  As a result SPECFP2006 is 1.1% faster.
> 
> Passes AArch64 and ARM bootstrap and regress.
> 
> ChangeLog:
> 2017-05-30  Wilco Dijkstra  <wdijk...@arm.com>
> 
>       * config/arm/cortex-a53.md (cortex_a53_fpalu) Adjust latency.
>       (cortex_a53_fconst): Likewise.
>       (cortex_a53_fpmul): Likewise.
>       (cortex_a53_f_load_64): Likewise.
>       (cortex_a53_f_load_many): Likewise.
>       (cortex_a53_advsimd_alu): Likewise.
>       (cortex_a53_advsimd_alu_q): Likewise.
>       (cortex_a53_advsimd_mul): Likewise.
>       (cortex_a53_advsimd_mul_q): Likewise.
>       (fpmac bypass): Add new bypass for fpmac-fpmac case.
>       Add missing fmul, r2f_cvt and fconst cases.


OK.

R.

> --
> diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
> index 
> 403e84b5d4c3993c06ee879d147f9c5eb6dd3b9a..49f80d3130f97777260e9d3c6d4f37ef3db20c94
>  100644
> --- a/gcc/config/arm/cortex-a53.md
> +++ b/gcc/config/arm/cortex-a53.md
> @@ -503,19 +503,19 @@ (define_cpu_unit "cortex_a53_crypto"
>  ;; Floating-point arithmetic.
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>  
> -(define_insn_reservation "cortex_a53_fpalu" 5
> +(define_insn_reservation "cortex_a53_fpalu" 4
>    (and (eq_attr "tune" "cortexa53")
>       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,
>                       f_cvt, fcmps, fcmpd, fccmps, fccmpd, fcsel,
>                       f_rints, f_rintd, f_minmaxs, f_minmaxd"))
>    "cortex_a53_slot_any,cortex_a53_fp_alu")
>  
> -(define_insn_reservation "cortex_a53_fconst" 3
> +(define_insn_reservation "cortex_a53_fconst" 2
>    (and (eq_attr "tune" "cortexa53")
>         (eq_attr "type" "fconsts,fconstd"))
>    "cortex_a53_slot_any,cortex_a53_fp_alu")
>  
> -(define_insn_reservation "cortex_a53_fpmul" 5
> +(define_insn_reservation "cortex_a53_fpmul" 4
>    (and (eq_attr "tune" "cortexa53")
>         (eq_attr "type" "fmuls,fmuld"))
>    "cortex_a53_slot_any,cortex_a53_fp_mul")
> @@ -566,14 +566,14 @@ (define_insn_reservation "cortex_a53_f_flags" 5
>  ;; Floating-point load/store.
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>  
> -(define_insn_reservation "cortex_a53_f_load_64" 4
> +(define_insn_reservation "cortex_a53_f_load_64" 3
>    (and (eq_attr "tune" "cortexa53")
>         (ior (eq_attr "type" "f_loads,f_loadd")
>           (eq_attr "cortex_a53_advsimd_type"
>                    "advsimd_load_64")))
>    "cortex_a53_slot_any+cortex_a53_ls_agen")
>  
> -(define_insn_reservation "cortex_a53_f_load_many" 5
> +(define_insn_reservation "cortex_a53_f_load_many" 4
>    (and (eq_attr "tune" "cortexa53")
>         (eq_attr "cortex_a53_advsimd_type"
>               "advsimd_load_128,advsimd_load_lots"))
> @@ -604,22 +604,22 @@ (define_insn_reservation "cortex_a53_f_store_many" 0
>  ;; or a 128-bit operation in which case we require in our model that we
>  ;; issue from slot 0.
>  
> -(define_insn_reservation "cortex_a53_advsimd_alu" 5
> +(define_insn_reservation "cortex_a53_advsimd_alu" 4
>    (and (eq_attr "tune" "cortexa53")
>         (eq_attr "cortex_a53_advsimd_type" "advsimd_alu"))
>    "cortex_a53_slot_any,cortex_a53_fp_alu")
>  
> -(define_insn_reservation "cortex_a53_advsimd_alu_q" 5
> +(define_insn_reservation "cortex_a53_advsimd_alu_q" 4
>    (and (eq_attr "tune" "cortexa53")
>         (eq_attr "cortex_a53_advsimd_type" "advsimd_alu_q"))
>    "cortex_a53_slot0,cortex_a53_fp_alu_q")
>  
> -(define_insn_reservation "cortex_a53_advsimd_mul" 5
> +(define_insn_reservation "cortex_a53_advsimd_mul" 4
>    (and (eq_attr "tune" "cortexa53")
>         (eq_attr "cortex_a53_advsimd_type" "advsimd_mul"))
>    "cortex_a53_slot_any,cortex_a53_fp_mul")
>  
> -(define_insn_reservation "cortex_a53_advsimd_mul_q" 5
> +(define_insn_reservation "cortex_a53_advsimd_mul_q" 4
>    (and (eq_attr "tune" "cortexa53")
>         (eq_attr "cortex_a53_advsimd_type" "advsimd_mul_q"))
>    "cortex_a53_slot0,cortex_a53_fp_mul_q")
> @@ -698,20 +698,18 @@ (define_insn_reservation "cortex_a53_crypto_sha_slow" 5
>  ;; multiply-accumulate operations as a bypass reducing the latency
>  ;; of producing instructions to near zero.
>  
> -(define_bypass 1 "cortex_a53_fp*,
> +(define_bypass 1 "cortex_a53_fpalu,
> +               cortex_a53_fpmul,
>                 cortex_a53_r2f,
> +               cortex_a53_r2f_cvt,
> +               cortex_a53_fconst,
>                 cortex_a53_f_load*"
>                "cortex_a53_fpmac"
>                "aarch_accumulator_forwarding")
>  
> -;; Model a bypass from the result of an FP operation to a use.
> -
> -(define_bypass 4 "cortex_a53_fpalu,
> -               cortex_a53_fpmul"
> -              "cortex_a53_fpalu,
> -               cortex_a53_fpmul,
> -               cortex_a53_fpmac,
> -               cortex_a53_advsimd_div*")
> +(define_bypass 4 "cortex_a53_fpmac"
> +              "cortex_a53_fpmac"
> +              "aarch_accumulator_forwarding")
>  
>  ;; We want AESE and AESMC to end up consecutive to one another.
>  
>

Re: [PATCH][AArch64] Improve Cortex-A53 FP scheduler

Reply via email to