Hi, This patch wires up our new target hook for AArch64. This means we can bring back the two failing SRA tests (churn :( ). For now, I've just used the old values we had for MOVE_RATIO. We should refactor that, as we use it in two places (more churn :( ).
Bootstrapped on AArch64 with no issues and benchmarked with no discernible impact. OK for trunk? Thanks, James --- gcc/ 2014-08-20 James Greenhalgh <james.greenha...@arm.com> * config/aarch64/aarch64.c (aarch64_expand_movmem): Refactor old move costs. (aarch64_default_max_total_scalarization_size): New. (TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE): Likewise. * config/aarch64/aarch64.h (AARCH64_MOVE_RATIO): New. (MOVE_RATIO): Use it. gcc/testsuite/ 2014-08-20 James Greenhalgh <james.greenha...@arm.com> * gcc.dg/tree-ssa/pr42585.c: Bring back for AArch64. * gcc.dg/tree-ssa/sra-12.c: Likewise.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index c3c871e..d608717 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9725,7 +9725,7 @@ aarch64_expand_movmem (rtx *operands) /* When optimizing for size, give a better estimate of the length of a memcpy call, but use the default otherwise. */ - unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2; + unsigned int max_instructions = AARCH64_MOVE_RATIO (speed_p); /* We can't do anything smart if the amount to copy is not constant. */ if (!CONST_INT_P (operands[2])) @@ -9826,6 +9826,14 @@ aarch64_expand_movmem (rtx *operands) return true; } +/* Implement TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE. */ + +static unsigned int +aarch64_default_max_total_scalarization_size (bool size_p) +{ + return AARCH64_MOVE_RATIO (!size_p); +} + #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost @@ -9949,6 +9957,10 @@ aarch64_expand_movmem (rtx *operands) #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE aarch64_mangle_type +#undef TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE +#define TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE \ + aarch64_default_max_total_scalarization_size + #undef TARGET_MEMORY_MOVE_COST #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index db950da..5401061 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -681,6 +681,8 @@ do { \ /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */ #define AARCH64_CALL_RATIO 8 +#define AARCH64_MOVE_RATIO(speed) (((speed) ? 15 : AARCH64_CALL_RATIO) / 2) + /* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure. move_by_pieces will continually copy the largest safe chunks. So a 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient @@ -688,7 +690,7 @@ do { \ standard name to implement the copy. This logic does not apply when targeting -mstrict-align, so keep a sensible default in that case. */ #define MOVE_RATIO(speed) \ - (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)) + (!STRICT_ALIGNMENT ? 2 : AARCH64_MOVE_RATIO (speed)) /* For CLEAR_RATIO, when optimizing for size, give a better estimate of the length of a memset call, but use the default otherwise. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c b/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c index 07f575d..a970c85 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c @@ -35,6 +35,6 @@ Cyc_string_ungetc (int ignore, struct _fat_ptr *sptr) /* Whether the structs are totally scalarized or not depends on the MOVE_RATIO macro definition in the back end. The scalarization will not take place when using small values for MOVE_RATIO. */ -/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ -/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c index 45aa963..59e5e6a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c @@ -21,5 +21,5 @@ int foo (struct S *p) *p = l; } -/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "aarch64*-*-* avr*-*-* nds32*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "avr*-*-* nds32*-*-*" } } } } */ /* { dg-final { cleanup-tree-dump "release_ssa" } } */