Hi! This patch adds new field in tune_params to indicate if LDRD/STRD are preferred over PUSH/POP in prologue/epilogue of specific core. It also creates new tune for cortex-A15 and updates tunes for other cores to set new field to default value.
Changelog entry for Patch to create tune for cortex-a15: 2011-10-11 Sameera Deshpande <sameera.deshpa...@arm.com> * config/arm/arm-cores.def (cortex_a15): Update. * config/arm/arm-protos.h (struct tune_params): Add new field... (arm_gen_ldrd_strd): ... this. * config/arm/arm.c (arm_slowmul_tune): Add arm_gen_ldrd_strd field settings. (arm_fastmul_tune): Likewise. (arm_strongarm_tune): Likewise. (arm_xscale_tune): Likewise. (arm_9e_tune): Likewise. (arm_v6t2_tune): Likewise. (arm_cortex_tune): Likewise. (arm_cortex_a5_tune): Likewise. (arm_cortex_a9_tune): Likewise. (arm_fa726te_tune): Likewise. (arm_cortex_a15_tune): New variable. -- On Tue, 2011-10-11 at 10:08 +0100, Sameera Deshpande wrote: > This series of 5 patches generate LDRD/STRD instead of POP/PUSH in > epilogue/prologue for ARM and Thumb-2 mode of A15. > > Patch [1/5] introduces new field in tune which can be used to indicate > whether LDRD/STRD are preferred over POP/PUSH by the specific core. > > Patches [2-5/5] use this field to determine if LDRD/STRD can be > generated instead of PUSH/POP in ARM and Thumb-2 mode. > > Patch [2/5] generates LDRD instead of POP for Thumb-2 epilogue in A15. > This patch depends on patch [1/5]. > > Patch [3/5] generates STRD instead of PUSH for Thumb-2 prologue in A15. > This patch depends for variables, functions and patterns defined in > [1/5] and [2/5]. > > Patch [4/5] generates STRD instead of PUSH for ARM prologue in A15. This > patch depends on [1/5]. > > Patch [5/5] generates LDRD instead of POP for ARM epilogue in A15. This > patch depends for variables, functions and patterns defined in [1/5] and > [4/5]. > > All these patches depend upon the Thumb2/ARM RTL epilogue patches > http://gcc.gnu.org/ml/gcc-patches/2011-09/msg01854.html, > http://gcc.gnu.org/ml/gcc-patches/2011-09/msg01855.html submitted for > review. > > All these patches are applied in given order and tested with check-gcc, > check-gdb and bootstrap without regression. > > In case of ARM mode, significant performance improvement can be seen on > some parts of a popular embedded consumer benchmark (~26%). > However, in most of the cases, not much effect is seen on performance. > (~ 3% improvement) > > In case of thumb2, the performance improvement observed on same parts > the benchmark is ~11% (2.5% improvement). >
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index 742b5e8..1b42713 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -128,7 +128,7 @@ ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex) ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) -ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) +ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index f69bc42..c6b8f71 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -243,6 +243,9 @@ struct tune_params int l1_cache_line_size; bool prefer_constant_pool; int (*branch_cost) (bool, bool); + /* This flag indicates if STRD/LDRD instructions are preferred + over PUSH/POP in epilogue/prologue. */ + bool prefer_ldrd_strd; }; extern const struct tune_params *current_tune; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 6c09267..d709375 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -850,7 +850,8 @@ const struct tune_params arm_slowmul_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ }; const struct tune_params arm_fastmul_tune = @@ -861,7 +862,8 @@ const struct tune_params arm_fastmul_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -875,7 +877,8 @@ const struct tune_params arm_strongarm_tune = 3, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ }; const struct tune_params arm_xscale_tune = @@ -886,7 +889,8 @@ const struct tune_params arm_xscale_tune = 3, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ }; const struct tune_params arm_9e_tune = @@ -897,7 +901,8 @@ const struct tune_params arm_9e_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ }; const struct tune_params arm_v6t2_tune = @@ -908,7 +913,8 @@ const struct tune_params arm_v6t2_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -920,7 +926,20 @@ const struct tune_params arm_cortex_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ +}; + +const struct tune_params arm_cortex_a15_tune = +{ + arm_9e_rtx_costs, + NULL, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true /* Prefer LDRD/STRD. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -934,7 +953,8 @@ const struct tune_params arm_cortex_a5_tune = 1, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ - arm_cortex_a5_branch_cost + arm_cortex_a5_branch_cost, + false /* Prefer LDRD/STRD. */ }; const struct tune_params arm_cortex_a9_tune = @@ -945,7 +965,8 @@ const struct tune_params arm_cortex_a9_tune = 5, /* Max cond insns. */ ARM_PREFETCH_BENEFICIAL(4,32,32), false, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ }; const struct tune_params arm_fa726te_tune = @@ -956,7 +977,8 @@ const struct tune_params arm_fa726te_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer LDRD/STRD. */ };