Hi Kyrill,

On 2018/5/23 16:08, Kyrill Tkachov wrote:
> 
> On 23/05/18 05:54, Zhangshaokun wrote:
>> Hi Kyrill,
>>
>> On 2018/5/22 18:52, Kyrill Tkachov wrote:
>>> Hi Shaokun,
>>>
>>> On 22/05/18 09:40, Shaokun Zhang wrote:
>>>> This patch adds HiSilicon's an mcpu: tsv110.
>>>>
>>>> ---
>>>>   gcc/ChangeLog                            |   9 +++
>>>>   gcc/config/aarch64/aarch64-cores.def     |   5 ++
>>>>   gcc/config/aarch64/aarch64-cost-tables.h | 103 
>>>> +++++++++++++++++++++++++++++++
>>>>   gcc/config/aarch64/aarch64-tune.md       |   2 +-
>>>>   gcc/config/aarch64/aarch64.c             |  79 ++++++++++++++++++++++++
>>>>   gcc/doc/invoke.texi                      |   2 +-
>>>>   6 files changed, 198 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>>>> index cec2892..5d44966 100644
>>>> --- a/gcc/ChangeLog
>>>> +++ b/gcc/ChangeLog
>>>> @@ -1,3 +1,12 @@
>>>> +2018-05-22  Shaokun Zhang <zhangshao...@hisilicon.com>
>>>> +            Bo Zhou  <zbo.z...@hisilicon.com>
>>>> +
>>>> +       * config/aarch64/aarch64-cores.def (tsv110): New CPU.
>>>> +       * config/aarch64/aarch64-tune.md: Regenerated.
>>>> +       * doc/invoke.texi (AArch61 Options/-mtune): Add "tsv110".
>>> typo: AArch64.
>>>
>> Good catch, my mistake.
>>
>>>> +       * gcc/config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
>>>> +       * gcc/config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra 
>>>> costs.
>>> Please start the path with config/.
>>>
>> Sure, Will remove gcc/ next version.
>>
>>>> +
>>>>   2018-05-21  Michael Meissner <meiss...@linux.ibm.com>
>>>>
>>>>           PR target/85657
>>>> diff --git a/gcc/config/aarch64/aarch64-cores.def 
>>>> b/gcc/config/aarch64/aarch64-cores.def
>>>> index 33b96ca..db7a412 100644
>>>> --- a/gcc/config/aarch64/aarch64-cores.def
>>>> +++ b/gcc/config/aarch64/aarch64-cores.def
>>>> @@ -91,6 +91,11 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A, 
>>>>  AARCH64_FL_FOR_ARCH8_2
>>>>   /* Qualcomm ('Q') cores. */
>>>>   AARCH64_CORE("saphira",     saphira,    falkor,    8_3A, 
>>>> AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   
>>>> 0x51, 0xC01, -1)
>>>>
>>>> +/* ARMv8.4-A Architecture Processors.  */
>>>> +
>>>> +/* HiSilicon ('H') cores. */
>>>> +AARCH64_CORE("tsv110",     tsv110,    tsv110,    8_4A, 
>>>> AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | 
>>>> AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
>>>> +
>>> The third field is the scheduler model to use when optimising.
>>> Since there is no tsv110 scheduling model, using the name "tsv110"
>>> in the third field will generally give pretty poor schedules.
>>> I recommend you specify an scheduling model that most closely matches your 
>>> core
>>> for the time being. But I don't think it's required and I wouldn't let it 
>>> hold
>> I checked it again, cortexa57 is most closely matches tsv110 and thanks your
>> suggestion.
>> If i choose cortexa57, can i add the tsv110_tunings which will use tsv110's
>> pipeline features, like the rest patch as follow or only use generic feature?
> 
> If you use cortexa57 for the scheduling model (the 3rd field) you should still
> use tsv110_tunings in the 6th field as this will specify other important 
> parameters
> like instruction selection costs, fusion capabilities, alignment requirements 
> etc.
> 

Thanks your comments, i will wait other maintainers comments and prepare next 
version.
One more question, any thoughts on my cover letter issue that skips DC CVAU for
HiSilicon tsv110 when sync icache and dcache?

Thanks,
Shaokun

> Thanks,
> Kyrill
> 
>>
>>> up the patch.
>>>
>>> You'll need approval from an aarch64 maintainer (cc'ed some for you).
>>>
>> Good, thanks for your nice guidance.
>>
>> Thanks,
>> Shaokun
>>
>>> Thanks,
>>> Kyrill
>>>
>>>>   /* ARMv8-A big.LITTLE implementations.  */
>>>>
>>>>   AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 
>>>> 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 
>>>> AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
>>>> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
>>>> b/gcc/config/aarch64/aarch64-cost-tables.h
>>>> index a455c62..b6890d6 100644
>>>> --- a/gcc/config/aarch64/aarch64-cost-tables.h
>>>> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
>>>> @@ -334,4 +334,107 @@ const struct cpu_cost_table thunderx2t99_extra_costs 
>>>> =
>>>>     }
>>>>   };
>>>>
>>>> +const struct cpu_cost_table tsv110_extra_costs =
>>>> +{
>>>> +  /* ALU */
>>>> +  {
>>>> +    0,                 /* arith.  */
>>>> +    0,                 /* logical.  */
>>>> +    0,                 /* shift.  */
>>>> +    0,                 /* shift_reg.  */
>>>> +    COSTS_N_INSNS (1), /* arith_shift.  */
>>>> +    COSTS_N_INSNS (1), /* arith_shift_reg.  */
>>>> +    COSTS_N_INSNS (1), /* log_shift.  */
>>>> +    COSTS_N_INSNS (1), /* log_shift_reg.  */
>>>> +    0,                 /* extend.  */
>>>> +    COSTS_N_INSNS (1), /* extend_arith.  */
>>>> +    0,                 /* bfi.  */
>>>> +    0,                 /* bfx.  */
>>>> +    0,                 /* clz.  */
>>>> +    0,                /* rev.  */
>>>> +    0,                 /* non_exec.  */
>>>> +    true               /* non_exec_costs_exec.  */
>>>> +  },
>>>> +  {
>>>> +    /* MULT SImode */
>>>> +    {
>>>> +      COSTS_N_INSNS (2),       /* simple.  */
>>>> +      COSTS_N_INSNS (2),       /* flag_setting.  */
>>>> +      COSTS_N_INSNS (2),       /* extend.  */
>>>> +      COSTS_N_INSNS (2),       /* add.  */
>>>> +      COSTS_N_INSNS (2),       /* extend_add.  */
>>>> +      COSTS_N_INSNS (11)       /* idiv.  */
>>>> +    },
>>>> +    /* MULT DImode */
>>>> +    {
>>>> +      COSTS_N_INSNS (3),       /* simple.  */
>>>> +      0,                       /* flag_setting (N/A).  */
>>>> +      COSTS_N_INSNS (3),       /* extend.  */
>>>> +      COSTS_N_INSNS (3),       /* add.  */
>>>> +      COSTS_N_INSNS (3),       /* extend_add.  */
>>>> +      COSTS_N_INSNS (19)       /* idiv.  */
>>>> +    }
>>>> +  },
>>>> +  /* LD/ST */
>>>> +  {
>>>> +    COSTS_N_INSNS (3),         /* load.  */
>>>> +    COSTS_N_INSNS (4),         /* load_sign_extend.  */
>>>> +    COSTS_N_INSNS (3),         /* ldrd.  */
>>>> +    COSTS_N_INSNS (3),         /* ldm_1st.  */
>>>> +    1,                         /* ldm_regs_per_insn_1st. */
>>>> +    2,                         /* ldm_regs_per_insn_subsequent.  */
>>>> +    COSTS_N_INSNS (4),         /* loadf.  */
>>>> +    COSTS_N_INSNS (4),         /* loadd.  */
>>>> +    COSTS_N_INSNS (4),         /* load_unaligned.  */
>>>> +    0,                         /* store.  */
>>>> +    0,                         /* strd.  */
>>>> +    0,                         /* stm_1st.  */
>>>> +    1,                         /* stm_regs_per_insn_1st. */
>>>> +    2,                         /* stm_regs_per_insn_subsequent.  */
>>>> +    0,                         /* storef.  */
>>>> +    0,                         /* stored.  */
>>>> +    COSTS_N_INSNS (1),         /* store_unaligned.  */
>>>> +    COSTS_N_INSNS (4),         /* loadv.  */
>>>> +    COSTS_N_INSNS (4)          /* storev.  */
>>>> +  },
>>>> +  {
>>>> +    /* FP SFmode */
>>>> +    {
>>>> +      COSTS_N_INSNS (10),      /* div.  */
>>>> +      COSTS_N_INSNS (4),       /* mult.  */
>>>> +      COSTS_N_INSNS (4),       /* mult_addsub.  */
>>>> +      COSTS_N_INSNS (4),       /* fma.  */
>>>> +      COSTS_N_INSNS (4),       /* addsub.  */
>>>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>>>> +      COSTS_N_INSNS (1),       /* neg.  */
>>>> +      COSTS_N_INSNS (1),       /* compare.  */
>>>> +      COSTS_N_INSNS (2),       /* widen.  */
>>>> +      COSTS_N_INSNS (2),       /* narrow.  */
>>>> +      COSTS_N_INSNS (2),       /* toint.  */
>>>> +      COSTS_N_INSNS (1),       /* fromint.  */
>>>> +      COSTS_N_INSNS (2)        /* roundint.  */
>>>> +    },
>>>> +    /* FP DFmode */
>>>> +    {
>>>> +      COSTS_N_INSNS (17),      /* div.  */
>>>> +      COSTS_N_INSNS (4),       /* mult.  */
>>>> +      COSTS_N_INSNS (6),       /* mult_addsub.  */
>>>> +      COSTS_N_INSNS (6),       /* fma.  */
>>>> +      COSTS_N_INSNS (3),       /* addsub.  */
>>>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>>>> +      COSTS_N_INSTS (1),       /* neg.  */
>>>> +      COSTS_N_INSTS (1),       /* compare.  */
>>>> +      COSTS_N_INSNS (2),       /* widen.  */
>>>> +      COSTS_N_INSNS (2),       /* narrow.  */
>>>> +      COSTS_N_INSNS (2),       /* toint.  */
>>>> +      COSTS_N_INSNS (1),       /* fromint.  */
>>>> +      COSTS_N_INSNS (2)        /* roundint.  */
>>>> +    }
>>>> +  },
>>>> +  /* Vector */
>>>> +  {
>>>> +    COSTS_N_INSNS (1)  /* alu.  */
>>>> +  }
>>>> +};
>>>> +
>>>>   #endif
>>>> diff --git a/gcc/config/aarch64/aarch64-tune.md 
>>>> b/gcc/config/aarch64/aarch64-tune.md
>>>> index 7b3a746..a10f2e7 100644
>>>> --- a/gcc/config/aarch64/aarch64-tune.md
>>>> +++ b/gcc/config/aarch64/aarch64-tune.md
>>>> @@ -1,5 +1,5 @@
>>>>   ;; -*- buffer-read-only: t -*-
>>>>   ;; Generated automatically by gentune.sh from aarch64-cores.def
>>>>   (define_attr "tune"
>>>> - 
>>>> "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>>>> + 
>>>> "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,tsv110,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>>>>           (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>>>> index 6bf6c05..0788c14 100644
>>>> --- a/gcc/config/aarch64/aarch64.c
>>>> +++ b/gcc/config/aarch64/aarch64.c
>>>> @@ -266,6 +266,22 @@ static const struct cpu_addrcost_table 
>>>> generic_addrcost_table =
>>>>     0 /* imm_offset  */
>>>>   };
>>>>
>>>> +static const struct cpu_addrcost_table tsv110_addrcost_table =
>>>> +{
>>>> +    {
>>>> +      1, /* hi  */
>>>> +      0, /* si  */
>>>> +      0, /* di  */
>>>> +      1, /* ti  */
>>>> +    },
>>>> +  0, /* pre_modify  */
>>>> +  0, /* post_modify  */
>>>> +  0, /* register_offset  */
>>>> +  1, /* register_sextend  */
>>>> +  1, /* register_zextend  */
>>>> +  0 /* imm_offset  */
>>>> +};
>>>> +
>>>>   static const struct cpu_addrcost_table exynosm1_addrcost_table =
>>>>   {
>>>>       {
>>>> @@ -344,6 +360,16 @@ static const struct cpu_regmove_cost 
>>>> cortexa53_regmove_cost =
>>>>     2 /* FP2FP  */
>>>>   };
>>>>
>>>> +static const struct cpu_regmove_cost tsv110_regmove_cost =
>>>> +{
>>>> +  1, /* GP2GP  */
>>>> +  /* Avoid the use of slow int<->fp moves for spilling by setting
>>>> +     their cost higher than memmov_cost.  */
>>>> +  2, /* GP2FP  */
>>>> +  3, /* FP2GP  */
>>>> +  2  /* FP2FP  */
>>>> +};
>>>> +
>>>>   static const struct cpu_regmove_cost exynosm1_regmove_cost =
>>>>   {
>>>>     1, /* GP2GP  */
>>>> @@ -450,6 +476,25 @@ static const struct cpu_vector_cost 
>>>> cortexa57_vector_cost =
>>>>     1 /* cond_not_taken_branch_cost  */
>>>>   };
>>>>
>>>> +static const struct cpu_vector_cost tsv110_vector_cost =
>>>> +{
>>>> +  1, /* scalar_int_stmt_cost  */
>>>> +  1, /* scalar_fp_stmt_cost  */
>>>> +  5, /* scalar_load_cost  */
>>>> +  1, /* scalar_store_cost  */
>>>> +  2, /* vec_int_stmt_cost  */
>>>> +  2, /* vec_fp_stmt_cost  */
>>>> +  2, /* vec_permute_cost  */
>>>> +  3, /* vec_to_scalar_cost  */
>>>> +  2, /* scalar_to_vec_cost  */
>>>> +  5, /* vec_align_load_cost  */
>>>> +  5, /* vec_unalign_load_cost  */
>>>> +  1, /* vec_unalign_store_cost  */
>>>> +  1, /* vec_store_cost  */
>>>> +  1, /* cond_taken_branch_cost  */
>>>> +  1 /* cond_not_taken_branch_cost  */
>>>> +};
>>>> +
>>>>   static const struct cpu_vector_cost exynosm1_vector_cost =
>>>>   {
>>>>     1, /* scalar_int_stmt_cost  */
>>>> @@ -550,6 +595,15 @@ static const cpu_prefetch_tune generic_prefetch_tune =
>>>>     -1                   /* default_opt_level  */
>>>>   };
>>>>
>>>> +static const cpu_prefetch_tune tsv110_prefetch_tune =
>>>> +{
>>>> +  0,                   /* num_slots  */
>>>> +  64,                  /* l1_cache_size  */
>>>> +  64,                  /* l1_cache_line_size  */
>>>> +  512,                 /* l2_cache_size  */
>>>> +  -1                   /* default_opt_level  */
>>>> +};
>>>> +
>>>>   static const cpu_prefetch_tune exynosm1_prefetch_tune =
>>>>   {
>>>>     0,                   /* num_slots  */
>>>> @@ -751,6 +805,31 @@ static const struct tune_params cortexa73_tunings =
>>>>   };
>>>>
>>>>
>>>> +static const struct tune_params tsv110_tunings =
>>>> +{
>>>> +  &tsv110_extra_costs,
>>>> +  &tsv110_addrcost_table,
>>>> +  &tsv110_regmove_cost,
>>>> +  &tsv110_vector_cost,
>>>> +  &generic_branch_cost,
>>>> +  &generic_approx_modes,
>>>> +  4, /* memmov_cost  */
>>>> +  4, /* issue_rate  */
>>>> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
>>>> +   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
>>>> +  16,  /* function_align.  */
>>>> +  4,   /* jump_align.  */
>>>> +  8,   /* loop_align.  */
>>>> +  2,   /* int_reassoc_width.  */
>>>> +  4,   /* fp_reassoc_width.  */
>>>> +  1,   /* vec_reassoc_width.  */
>>>> +  2,   /* min_div_recip_mul_sf.  */
>>>> +  2,   /* min_div_recip_mul_df.  */
>>>> +  0,   /* max_case_values.  */
>>>> +  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
>>>> +  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
>>>> +  &tsv110_prefetch_tune
>>>> +};
>>>>
>>>>   static const struct tune_params exynosm1_tunings =
>>>>   {
>>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>>> index beba295..55fcd42 100644
>>>> --- a/gcc/doc/invoke.texi
>>>> +++ b/gcc/doc/invoke.texi
>>>> @@ -14713,7 +14713,7 @@ performance of the code. Permissible values for 
>>>> this option are:
>>>>   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>>>   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, 
>>>> @samp{cortex-a75},
>>>>   @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
>>>> -@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
>>>> +@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110},
>>>>   @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>>>   @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
>>>>   @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
>>>> -- 
>>>> 2.7.4
>>>>
>>>
>>> .
>>>
> 
> 
> .
> 

Reply via email to