RE: [PATCH] aarch64: Add the cost model for Neoverse N1

Tamar Christina via Gcc-patches Mon, 24 Apr 2023 10:38:12 -0700

Hi Evandro,

I wanted to give this patch a try, but the diff seems corrupt, the whitespaces 
at the start of the context lines seem to have gone missing.


Could you try resending it?

Thanks,
Tamar

> -----Original Message-----
> From: Gcc-patches <gcc-patches-
> bounces+tamar.christina=arm....@gcc.gnu.org> On Behalf Of Evandro
> Menezes via Gcc-patches
> Sent: Tuesday, April 18, 2023 10:42 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Evandro Menezes <ebah...@icloud.com>; Richard Sandiford
> <richard.sandif...@arm.com>; Kyrylo Tkachov <kyrylo.tkac...@arm.com>
> Subject: [PATCH] aarch64: Add the cost model for Neoverse N1
> 
> This patch adds the cost model for Neoverse N1, based on the information
> from the "Arm Neoverse N1 Software Optimization Guide”.
> 
> --
> Evandro Menezes
> 
> ===================================================================
> =============
> 
> gcc/ChangeLog:
> 
>        * config/aarch64/aarch64-cores.def: Use the Neoverse N1 cost model.
>        * config/aarch64/aarch64.cc
>        (cortexa76_tunings): Rename variable.
>        (neoversen1_addrcost_table): New variable.
>        (neoversen1_vector_cost): Likewise.
>        (neoversen1_regmove_cost): Likewise.
>        (neoversen1_advsimd_vector_cost): Likewise.
>        (neoversen1_scalar_issue_info): Likewise.
>        (neoversen1_advsimd_issue_info): Likewise.
>        (neoversen1_vec_issue_info): Likewise.
>        (neoversen1_vector_cost): Likewise.
>        (neoversen1_tunings): Likewise.
>        * config/arm/aarch-cost-tables.h
>        (neoversen1_extra_costs): New variable.
> 
> Signed-off-by: Evandro Menezes <evan...@gcc.gnu.org>
> ---
> gcc/config/aarch64/aarch64-cores.def |  20 ++--
> gcc/config/aarch64/aarch64.cc        | 155 ++++++++++++++++++++++++---
> gcc/config/arm/aarch-cost-tables.h   | 107 ++++++++++++++++++
> 3 files changed, 259 insertions(+), 23 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64-cores.def
> b/gcc/config/aarch64/aarch64-cores.def
> index 2ec88c98400..e352e4077b1 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -105,17 +105,17 @@ AARCH64_CORE("thunderx2t99",  thunderx2t99,
> thunderx2t99, V8_1A,  (CRYPTO), thu
> /* ARM ('A') cores. */
> AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, V8_2A,  (F16, RCPC,
> DOTPROD), cortexa53, 0x41, 0xd05, -1) AARCH64_CORE("cortex-a75",
> cortexa75, cortexa57, V8_2A,  (F16, RCPC, DOTPROD), cortexa73, 0x41,
> 0xd0a, -1) -AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, V8_2A,
> (F16, RCPC, DOTPROD), neoversen1, 0x41, 0xd0b, -1) -
> AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, V8_2A,  (F16,
> RCPC, DOTPROD, SSBS), neoversen1, 0x41, 0xd0e, -1) -
> AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, V8_2A,  (F16, RCPC,
> DOTPROD, SSBS), neoversen1, 0x41, 0xd0d, -1) -AARCH64_CORE("cortex-
> a78",  cortexa78, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, SSBS, PROFILE),
> neoversen1, 0x41, 0xd41, -1) -AARCH64_CORE("cortex-a78ae",  cortexa78ae,
> cortexa57, V8_2A,  (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41,
> 0xd42, -1) -AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, V8_2A,
> (F16, RCPC, DOTPROD, SSBS, PROFILE, FLAGM, PAUTH), neoversen1, 0x41,
> 0xd4b, -1)
> +AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, V8_2A,  (F16, RCPC,
> +DOTPROD), cortexa76, 0x41, 0xd0b, -1) AARCH64_CORE("cortex-a76ae",
> +cortexa76ae, cortexa57, V8_2A,  (F16, RCPC, DOTPROD, SSBS), cortexa76,
> +0x41, 0xd0e, -1) AARCH64_CORE("cortex-a77",  cortexa77, cortexa57,
> +V8_2A,  (F16, RCPC, DOTPROD, SSBS), cortexa76, 0x41, 0xd0d, -1)
> +AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, V8_2A,  (F16, RCPC,
> +DOTPROD, SSBS, PROFILE), cortexa76, 0x41, 0xd41, -1)
> +AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, V8_2A,  (F16,
> +RCPC, DOTPROD, SSBS, PROFILE), cortexa76, 0x41, 0xd42, -1)
> +AARCH64_CORE("cortex-a78c",  cortexa78c, cortexa57, V8_2A,  (F16, RCPC,
> +DOTPROD, SSBS, PROFILE, FLAGM, PAUTH), cortexa76, 0x41, 0xd4b, -1)
> AARCH64_CORE("cortex-a65",  cortexa65, cortexa53, V8_2A,  (F16, RCPC,
> DOTPROD, SSBS), cortexa73, 0x41, 0xd06, -1) AARCH64_CORE("cortex-
> a65ae",  cortexa65ae, cortexa53, V8_2A,  (F16, RCPC, DOTPROD, SSBS),
> cortexa73, 0x41, 0xd43, -1) -AARCH64_CORE("cortex-x1",  cortexx1,
> cortexa57, V8_2A,  (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41,
> 0xd44, -1) -AARCH64_CORE("cortex-x1c",  cortexx1c, cortexa57, V8_2A,
> (F16, RCPC, DOTPROD, SSBS, PROFILE, PAUTH), neoversen1, 0x41, 0xd4c, -1)
> -AARCH64_CORE("ares",  ares, cortexa57, V8_2A,  (F16, RCPC, DOTPROD,
> PROFILE), neoversen1, 0x41, 0xd0c, -1)
> +AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, V8_2A,  (F16, RCPC,
> +DOTPROD, SSBS, PROFILE), cortexa76, 0x41, 0xd44, -1)
> +AARCH64_CORE("cortex-x1c",  cortexx1c, cortexa57, V8_2A,  (F16, RCPC,
> +DOTPROD, SSBS, PROFILE, PAUTH), cortexa76, 0x41, 0xd4c, -1)
> +AARCH64_CORE("ares",  ares, cortexa57, V8_2A,  (F16, RCPC, DOTPROD,
> +PROFILE), cortexa76, 0x41, 0xd0c, -1)
> AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, V8_2A,  (F16, RCPC,
> DOTPROD, PROFILE), neoversen1, 0x41, 0xd0c, -1)
> AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, V8_2A,  (F16, RCPC,
> DOTPROD, SSBS), cortexa73, 0x41, 0xd4a, -1)
> 
> @@ -160,7 +160,7 @@ AARCH64_CORE("cortex-a73.cortex-a53",
> cortexa73cortexa53, cortexa53, V8A,  (CRC
> /* ARM DynamIQ big.LITTLE configurations.  */
> 
> AARCH64_CORE("cortex-a75.cortex-a55",  cortexa75cortexa55, cortexa53,
> V8_2A,  (F16, RCPC, DOTPROD), cortexa73, 0x41, AARCH64_BIG_LITTLE
> (0xd0a, 0xd05), -1) -AARCH64_CORE("cortex-a76.cortex-a55",
> cortexa76cortexa55, cortexa53, V8_2A,  (F16, RCPC, DOTPROD), neoversen1,
> 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
> +AARCH64_CORE("cortex-a76.cortex-a55",  cortexa76cortexa55, cortexa53,
> +V8_2A,  (F16, RCPC, DOTPROD), cortexa76, 0x41, AARCH64_BIG_LITTLE
> +(0xd0b, 0xd05), -1)
> 
> /* Armv8-R Architecture Processors.  */
> AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41,
> 0xd15, -1) diff --git a/gcc/config/aarch64/aarch64.cc
> b/gcc/config/aarch64/aarch64.cc index 42617ced73a..46710490a39
> 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -1867,7 +1867,7 @@ static const struct tune_params
> thunderx3t110_tunings =
>   &thunderx3t110_prefetch_tune
> };
> 
> -static const struct tune_params neoversen1_tunings =
> +static const struct tune_params cortexa76_tunings =
> {
>   &cortexa76_extra_costs,
>   &generic_addrcost_table,
> @@ -1885,18 +1885,18 @@ static const struct tune_params
> neoversen1_tunings =
>   }, /* memmov_cost.  */
>   3, /* issue_rate  */
>   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /*
> fusible_ops  */
> -  "32:16", /* function_align.  */
> -  "4", /* jump_align.  */
> -  "32:16", /* loop_align.  */
> -  2, /* int_reassoc_width.  */
> -  4, /* fp_reassoc_width.  */
> -  1, /* fma_reassoc_width.  */
> -  2, /* vec_reassoc_width.  */
> -  2, /* min_div_recip_mul_sf.  */
> -  2, /* min_div_recip_mul_df.  */
> -  0, /* max_case_values.  */
> -  tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model.  */
> -  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags.  */
> +  "32:16",     /* function_align.  */
> +  "4",         /* jump_align.  */
> +  "32:16",     /* loop_align.  */
> +  2,   /* int_reassoc_width.  */
> +  4,   /* fp_reassoc_width.  */
> +  1,   /* fma_reassoc_width.  */
> +  2,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2,   /* min_div_recip_mul_df.  */
> +  0,   /* max_case_values.  */
> +  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
> +  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),     /* tune_flags.  */
>   &generic_prefetch_tune
> };
> 
> @@ -2293,6 +2293,135 @@ static const struct tune_params
> neoverse512tvb_tunings =
>   &generic_prefetch_tune
> };
> 
> +static const struct cpu_addrcost_table neoversen1_addrcost_table = {
> +    {
> +      0, /* hi  */
> +      0, /* si  */
> +      0, /* di  */
> +      1, /* ti  */
> +    },
> +  0, /* pre_modify  */
> +  0, /* post_modify  */
> +  1, /* post_modify_ld3_st3  */
> +  1, /* post_modify_ld4_st4  */
> +  0, /* register_offset  */
> +  0, /* register_sextend  */
> +  0, /* register_zextend  */
> +  0 /* imm_offset  */
> +};
> +
> +static const struct cpu_regmove_cost neoversen1_regmove_cost = {
> +  1, /* GP2GP  */
> +  /* Avoid the use of slow int<->fp moves for spilling by setting
> +     their cost higher than memmov_cost.  */
> +  3, /* GP2FP  */
> +  2, /* FP2GP  */
> +  2 /* FP2FP  */
> +};
> +
> +static const advsimd_vec_cost neoversen1_advsimd_vector_cost = {
> +  2, /* int_stmt_cost  */
> +  2, /* fp_stmt_cost  */
> +  0, /* ld2_st2_permute_cost  */
> +  0, /* ld3_st3_permute_cost  */
> +  0, /* ld4_st4_permute_cost  */
> +  3, /* permute_cost  */
> +  6, /* reduc_i8_cost  */
> +  5, /* reduc_i16_cost  */
> +  3, /* reduc_i32_cost  */
> +  3, /* reduc_i64_cost  */
> +  8, /* reduc_f16_cost  */
> +  5, /* reduc_f32_cost  */
> +  5, /* reduc_f64_cost  */
> +  0, /* store_elt_extra_cost  */
> +  2, /* vec_to_scalar_cost  */
> +  2, /* scalar_to_vec_cost  */
> +  4, /* align_load_cost  */
> +  4, /* unalign_load_cost  */
> +  1, /* unalign_store_cost  */
> +  1  /* store_cost  */
> +};
> +
> +static const aarch64_scalar_vec_issue_info neoversen1_scalar_issue_info
> += {
> +  2, /* loads_stores_per_cycle  */
> +  2, /* stores_per_cycle  */
> +  2, /* general_ops_per_cycle  */
> +  0, /* fp_simd_load_general_ops  */
> +  1 /* fp_simd_store_general_ops  */
> +};
> +
> +static const aarch64_advsimd_vec_issue_info
> +neoversen1_advsimd_issue_info = {
> +  {
> +    2, /* loads_stores_per_cycle  */
> +    2, /* stores_per_cycle  */
> +    2, /* general_ops_per_cycle  */
> +    0, /* fp_simd_load_general_ops  */
> +    1 /* fp_simd_store_general_ops  */
> +  },
> +  3, /* ld2_st2_general_ops  */
> +  5, /* ld3_st3_general_ops  */
> +  11 /* ld4_st4_general_ops  */
> +};
> +
> +static const aarch64_vec_issue_info neoversen1_vec_issue_info = {
> +  &neoversen1_scalar_issue_info, /* scalar  */
> +  &neoversen1_advsimd_issue_info, /* advsimd  */
> +  nullptr /* sve  */
> +};
> +
> +
> +static const struct cpu_vector_cost neoversen1_vector_cost = {
> +  1, /* scalar_int_stmt_cost  */
> +  1, /* scalar_fp_stmt_cost  */
> +  4, /* scalar_load_cost  */
> +  1, /* scalar_store_cost  */
> +  1, /* cond_taken_branch_cost  */
> +  1, /* cond_not_taken_branch_cost  */
> +  &neoversen1_advsimd_vector_cost, /* advsimd  */
> +  nullptr, /* sve  */
> +  &neoversen1_vec_issue_info /* issue_info  */ };
> +
> +static const struct tune_params neoversen1_tunings = {
> +  &neoversen1_extra_costs,
> +  &neoversen1_addrcost_table,
> +  &neoversen1_regmove_cost,
> +  &neoversen1_vector_cost,
> +  &generic_branch_cost,
> +  &generic_approx_modes,
> +  SVE_NOT_IMPLEMENTED, /* sve_width  */
> +  { 4, /* load_int.  */
> +    2, /* store_int.  */
> +    5, /* load_fp.  */
> +    2, /* store_fp.  */
> +    4, /* load_pred.  */
> +    4 /* store_pred.  */
> +  }, /* memmov_cost.  */
> +  4, /* issue_rate  */
> +  AARCH64_FUSE_AES_AESMC, /* fusible_ops  */
> +  "32:16", /* function_align.  */
> +  "4", /* jump_align.  */
> +  "32:16", /* loop_align.  */
> +  2, /* int_reassoc_width.  */
> +  4, /* fp_reassoc_width.  */
> +  1, /* fma_reassoc_width.  */
> +  2, /* vec_reassoc_width.  */
> +  2, /* min_div_recip_mul_sf.  */
> +  2, /* min_div_recip_mul_df.  */
> +  0, /* max_case_values.  */
> +  tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model.  */
> +  AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND, /* tune_flags.  */
> +  &generic_prefetch_tune
> +};
> +
> static const advsimd_vec_cost neoversen2_advsimd_vector_cost = {
>   2, /* int_stmt_cost  */
> diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-
> tables.h
> index e3848214728..fce6da6bbcc 100644
> --- a/gcc/config/arm/aarch-cost-tables.h
> +++ b/gcc/config/arm/aarch-cost-tables.h
> @@ -450,6 +450,113 @@ const struct cpu_cost_table cortexa76_extra_costs
> =
>   }
> };
> 
> +const struct cpu_cost_table neoversen1_extra_costs = {
> +  /* ALU */
> +  {
> +    0,                 /* arith.  */
> +    0,                 /* logical.  */
> +    0,                 /* shift.  */
> +    0,                 /* shift_reg.  */
> +    COSTS_N_INSNS (1), /* arith_shift.  */
> +    COSTS_N_INSNS (1), /* arith_shift_reg.  */
> +    0,       /* log_shift.  */
> +    COSTS_N_INSNS (1), /* log_shift_reg.  */
> +    0,                 /* extend.  */
> +    COSTS_N_INSNS (1), /* extend_arith.  */
> +    COSTS_N_INSNS (1), /* bfi.  */
> +    0,                 /* bfx.  */
> +    0,                 /* clz.  */
> +    0,                 /* rev.  */
> +    0,                 /* non_exec.  */
> +    true               /* non_exec_costs_exec.  */
> +  },
> +  {
> +    /* MULT SImode */
> +    {
> +      COSTS_N_INSNS (1),       /* simple.  */
> +      COSTS_N_INSNS (2),       /* flag_setting.  */
> +      COSTS_N_INSNS (1),       /* extend.  */
> +      COSTS_N_INSNS (1),       /* add.  */
> +      COSTS_N_INSNS (1),       /* extend_add.  */
> +      COSTS_N_INSNS (11)       /* idiv.  */
> +    },
> +    /* MULT DImode */
> +    {
> +      COSTS_N_INSNS (3),       /* simple.  */
> +      0,                       /* flag_setting (N/A).  */
> +      COSTS_N_INSNS (1),       /* extend.  */
> +      COSTS_N_INSNS (3),       /* add.  */
> +      COSTS_N_INSNS (1),       /* extend_add.  */
> +      COSTS_N_INSNS (19)       /* idiv.  */
> +    }
> +  },
> +  /* LD/ST */
> +  {
> +    COSTS_N_INSNS (3),         /* load.  */
> +    COSTS_N_INSNS (3),         /* load_sign_extend.  */
> +    COSTS_N_INSNS (3),         /* ldrd.  */
> +    COSTS_N_INSNS (2),         /* ldm_1st.  */
> +    1,                         /* ldm_regs_per_insn_1st.  */
> +    2,                         /* ldm_regs_per_insn_subsequent.  */
> +    COSTS_N_INSNS (4),         /* loadf.  */
> +    COSTS_N_INSNS (4),         /* loadd.  */
> +    COSTS_N_INSNS (3),         /* load_unaligned.  */
> +    0,                         /* store.  */
> +    0,                         /* strd.  */
> +    0,                         /* stm_1st.  */
> +    1,                         /* stm_regs_per_insn_1st.  */
> +    2,                         /* stm_regs_per_insn_subsequent.  */
> +    0,                         /* storef.  */
> +    0,                         /* stored.  */
> +    COSTS_N_INSNS (1),         /* store_unaligned.  */
> +    COSTS_N_INSNS (1),         /* loadv.  */
> +    COSTS_N_INSNS (1)          /* storev.  */
> +  },
> +  {
> +    /* FP SFmode */
> +    {
> +      COSTS_N_INSNS (9),       /* div.  */
> +      COSTS_N_INSNS (2),       /* mult.  */
> +      COSTS_N_INSNS (3),       /* mult_addsub.  */
> +      COSTS_N_INSNS (3),       /* fma.  */
> +      COSTS_N_INSNS (1),       /* addsub.  */
> +      COSTS_N_INSNS (1),       /* fpconst.  */
> +      0,                       /* neg.  */
> +      0,                       /* compare.  */
> +      COSTS_N_INSNS (1),       /* widen.  */
> +      COSTS_N_INSNS (1),       /* narrow.  */
> +      COSTS_N_INSNS (1),       /* toint.  */
> +      COSTS_N_INSNS (1),       /* fromint.  */
> +      COSTS_N_INSNS (1)        /* roundint.  */
> +    },
> +    /* FP DFmode */
> +    {
> +      COSTS_N_INSNS (14),      /* div.  */
> +      COSTS_N_INSNS (2),       /* mult.  */
> +      COSTS_N_INSNS (3),       /* mult_addsub.  */
> +      COSTS_N_INSNS (3),       /* fma.  */
> +      COSTS_N_INSNS (1),       /* addsub.  */
> +      COSTS_N_INSNS (1),       /* fpconst.  */
> +      0,                       /* neg.  */
> +      0,                       /* compare.  */
> +      COSTS_N_INSNS (1),       /* widen.  */
> +      COSTS_N_INSNS (1),       /* narrow.  */
> +      COSTS_N_INSNS (1),       /* toint.  */
> +      COSTS_N_INSNS (1),       /* fromint.  */
> +      COSTS_N_INSNS (1)        /* roundint.  */
> +    }
> +  },
> +  /* Vector */
> +  {
> +    COSTS_N_INSNS (1),  /* alu.  */
> +    COSTS_N_INSNS (4),  /* mult.  */
> +    COSTS_N_INSNS (1),  /* movi.  */
> +    COSTS_N_INSNS (1),  /* dup.  */
> +    COSTS_N_INSNS (1)   /* extract.  */
> +  }
> +};
> +
> const struct cpu_cost_table exynosm1_extra_costs = {
>   /* ALU */
> --
> 2.39.2 (Apple Git-143)
> 
> 
> 
> 
> --
> Evandro Menezes ◊ evan...@yahoo.com ◊ Austin, TX
> Άγιος ο Θεός ⁂ ܩܕܝܫܐ ܐܢ̱ܬ ܠܐ ܡܝܘܬܐ ⁂ Sanctus Deus

RE: [PATCH] aarch64: Add the cost model for Neoverse N1

Reply via email to