Hi all,This is the aarch64 implementation of the macro fusion hook, used to fuse mov+movk instructions together.
A new field is declared in the tuning struct and as we add more fuseable ops in the future we will fill in more bits in the fuseable_ops field.
Bootstrapped and tested on aarch64-none-linux-gnu. Ok for trunk? 2014-11-11 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * config/aarch64/aarch64-protos.h (struct tune_params): Add fuseable_ops field. * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops. (cortexa53_tunings): Likewise. (cortexa57_tunings): Likewise. (thunderx_tunings): Likewise. (aarch64_macro_fusion_p): New function. (aarch_macro_fusion_pair_p): Likewise. (TARGET_SCHED_MACRO_FUSION_P): Define. (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise. (AARCH64_FUSE_MOV_MOVK): Likewise. (AARCH64_FUSE_NOTHING): Likewise.
commit 3181b0988eed091c8b1ead7a6381c6f9aee7774e Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com> Date: Tue Oct 21 10:36:48 2014 +0100 [AArch64] Implement TARGET_MACRO_FUSION diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 810644c..d3d295d 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -170,6 +170,7 @@ struct tune_params const struct cpu_vector_cost *const vec_costs; const int memmov_cost; const int issue_rate; + const unsigned int fuseable_ops; }; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9aeac7c..96f6c47 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -299,6 +299,9 @@ static const struct cpu_vector_cost cortexa57_vector_cost = NAMED_PARAM (cond_not_taken_branch_cost, 1) }; +#define AARCH64_FUSE_NOTHING (0) +#define AARCH64_FUSE_MOV_MOVK (1 << 0) + #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ #endif @@ -309,7 +312,8 @@ static const struct tune_params generic_tunings = &generic_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING) }; static const struct tune_params cortexa53_tunings = @@ -319,7 +323,8 @@ static const struct tune_params cortexa53_tunings = &cortexa53_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK) }; static const struct tune_params cortexa57_tunings = @@ -329,7 +334,8 @@ static const struct tune_params cortexa57_tunings = &cortexa57_regmove_cost, &cortexa57_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 3) + NAMED_PARAM (issue_rate, 3), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK) }; static const struct tune_params thunderx_tunings = @@ -339,7 +345,8 @@ static const struct tune_params thunderx_tunings = &thunderx_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 6), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING) }; /* A processor implementing AArch64. */ @@ -10017,6 +10024,48 @@ aarch64_use_by_pieces_infrastructure_p (unsigned int size, return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); } +static bool +aarch64_macro_fusion_p (void) +{ + return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING; +} + +static bool +aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx set_dest; + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + if (!prev_set + || !curr_set) + return false; + + if (any_condjump_p (curr)) + return false; + + if (!aarch64_macro_fusion_p ()) + return false; + + if (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK) + { + /* We are trying to fuse + mov imm / movk imm + instructions as a group that gets scheduled together. */ + + set_dest = SET_DEST (curr_set); + + return GET_CODE (set_dest) == ZERO_EXTRACT + && CONST_INT_P (SET_SRC (curr_set)) + && CONST_INT_P (SET_SRC (prev_set)) + && REG_P (XEXP (set_dest, 0)) + && REG_P (SET_DEST (prev_set)) + && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)); + } + + return false; +} + #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost @@ -10273,6 +10322,12 @@ aarch64_use_by_pieces_infrastructure_p (unsigned int size, #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ aarch64_use_by_pieces_infrastructure_p +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p + +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-aarch64.h"