On Fri, Jun 3, 2016 at 3:51 AM, Wilco Dijkstra <wilco.dijks...@arm.com> wrote:
> Increase loop alignment on Cortex cores to 8 and set function alignment to 
> 16.  This makes things consistent across big.LITTLE cores, improves 
> performance of benchmarks with tight loops and reduces performance variations 
> due to small changes in code layout. It looks almost all AArch64 cores agree 
> on alignment of 16 for function, and 8 for loops and branches, so we should 
> change -mcpu=generic as well if there is no disagreement - feedback welcome.

This is actually might be better for ThunderX than the current set of
values for ThunderX.  I have tried 16 alignment for functions to see
if it is better but it should not hurt ThunderX that much as we have a
128 byte cache line anyways.

Thanks,
Andrew


>
> OK for commit?
>
> ChangeLog:
>
> 2016-05-03  Wilco Dijkstra  <wdijk...@arm.com>
>
>         * gcc/config/aarch64/aarch64.c (cortexa53_tunings):
>         Increase loop alignment to 8.  Set function alignment to 16.
>         (cortexa35_tunings): Likewise.
>         (cortexa57_tunings): Increase loop alignment to 8.
>         (cortexa72_tunings): Likewise.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 12e5017a6d4b0ab15dcf932014980fdbd1a598ee..6ea10a187a1f895a399515b8cd0da0be63be827a
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -424,9 +424,9 @@ static const struct tune_params cortexa35_tunings =
>    1, /* issue_rate  */
>    (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
>     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
> -  8,   /* function_align.  */
> +  16,  /* function_align.  */
>    8,   /* jump_align.  */
> -  4,   /* loop_align.  */
> +  8,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
>    1,   /* vec_reassoc_width.  */
> @@ -449,9 +449,9 @@ static const struct tune_params cortexa53_tunings =
>    2, /* issue_rate  */
>    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
>     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
> -  8,   /* function_align.  */
> +  16,  /* function_align.  */
>    8,   /* jump_align.  */
> -  4,   /* loop_align.  */
> +  8,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
>    1,   /* vec_reassoc_width.  */
> @@ -476,7 +476,7 @@ static const struct tune_params cortexa57_tunings =
>     | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
>    16,  /* function_align.  */
>    8,   /* jump_align.  */
> -  4,   /* loop_align.  */
> +  8,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
>    1,   /* vec_reassoc_width.  */
> @@ -502,7 +502,7 @@ static const struct tune_params cortexa72_tunings =
>     | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
>    16,  /* function_align.  */
>    8,   /* jump_align.  */
> -  4,   /* loop_align.  */
> +  8,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
>    1,   /* vec_reassoc_width.  */
>

Reply via email to