On 29/10/14 12:55, Wilco Dijkstra wrote: > This patch adds the TARGET_SCHED_REASSOCIATION_WIDTH hook. Separate settings > for integer, floating > point and vector modes are supported via the CPU tuning parameters. Setting > the FP reassociation > width to 4 improves FP performance on SPEC2000 by ~1.3%. > > OK for commit? > > ChangeLog: > 2014-10-29 Wilco Dijkstra <wdijk...@arm.com> > > * gcc/config/aarch64/aarch64-protos.h (tune-params): > Add reasociation tuning parameters. > * gcc/config/aarch64/aarch64.c (TARGET_SCHED_REASSOCIATION_WIDTH): > Define. (aarch64_reassociation_width): New function. > (generic_tunings) Add reassociation tuning parameters. > (cortexa53_tunings): Likewise. > (cortexa57_tunings): Likewise. > (thunderx_tunings): Likewise. >
If all cores seem to benefit from FP reassociation set to 4, then it seems odd that 4 is not also the default for generic. Andrew, you may need to pick a target-specific value for ThunderX; I think Wilco has just picked something that seems plausible because he needs to put a real value in there. What happens if the integer and vector numbers are bumped up? I'd have thought that integer numbers >1 would be appropriate on all dual-issue or greater cores. R. > --- > gcc/config/aarch64/aarch64-protos.h | 3 +++ > gcc/config/aarch64/aarch64.c | 34 +++++++++++++++++++++++++++++++--- > 2 files changed, 34 insertions(+), 3 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index 810644c..9c03f7b 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -170,6 +170,9 @@ struct tune_params > const struct cpu_vector_cost *const vec_costs; > const int memmov_cost; > const int issue_rate; > + const int int_reassoc_width; > + const int fp_reassoc_width; > + const int vec_reassoc_width; > }; > > HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index e6cd5eb..4d67722 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -309,7 +309,10 @@ static const struct tune_params generic_tunings = > &generic_regmove_cost, > &generic_vector_cost, > NAMED_PARAM (memmov_cost, 4), > - NAMED_PARAM (issue_rate, 2) > + NAMED_PARAM (issue_rate, 2), > + 1, /* int_reassoc_width. */ > + 1, /* fp_reassoc_width. */ > + 1 /* vec_reassoc_width. */ > }; > > static const struct tune_params cortexa53_tunings = > @@ -319,7 +322,10 @@ static const struct tune_params cortexa53_tunings = > &cortexa53_regmove_cost, > &generic_vector_cost, > NAMED_PARAM (memmov_cost, 4), > - NAMED_PARAM (issue_rate, 2) > + NAMED_PARAM (issue_rate, 2), > + 1, /* int_reassoc_width. */ > + 4, /* fp_reassoc_width. */ > + 1 /* vec_reassoc_width. */ > }; > > static const struct tune_params cortexa57_tunings = > @@ -329,7 +335,10 @@ static const struct tune_params cortexa57_tunings = > &cortexa57_regmove_cost, > &cortexa57_vector_cost, > NAMED_PARAM (memmov_cost, 4), > - NAMED_PARAM (issue_rate, 3) > + NAMED_PARAM (issue_rate, 3), > + 1, /* int_reassoc_width. */ > + 4, /* fp_reassoc_width. */ > + 1 /* vec_reassoc_width. */ > }; > > static const struct tune_params thunderx_tunings = > @@ -340,6 +349,9 @@ static const struct tune_params thunderx_tunings = > &generic_vector_cost, > NAMED_PARAM (memmov_cost, 6), > NAMED_PARAM (issue_rate, 2) > + 1, /* int_reassoc_width. */ > + 4, /* fp_reassoc_width. */ > + 1 /* vec_reassoc_width. */ > }; > > /* A processor implementing AArch64. */ > @@ -429,6 +441,19 @@ static const char * const aarch64_condition_codes[] = > "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" > }; > > +static int > +aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED, > + enum machine_mode mode) > +{ > + if (VECTOR_MODE_P (mode)) > + return aarch64_tune_params->vec_reassoc_width; > + if (INTEGRAL_MODE_P (mode)) > + return aarch64_tune_params->int_reassoc_width; > + if (FLOAT_MODE_P (mode)) > + return aarch64_tune_params->fp_reassoc_width; > + return 1; > +} > + > /* Provide a mapping from gcc register numbers to dwarf register numbers. */ > unsigned > aarch64_dbx_register_number (unsigned regno) > @@ -10147,6 +10172,9 @@ aarch64_asan_shadow_offset (void) > #undef TARGET_PREFERRED_RELOAD_CLASS > #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class > > +#undef TARGET_SCHED_REASSOCIATION_WIDTH > +#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width > + > #undef TARGET_SECONDARY_RELOAD > #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload > >