On 26/07/12 11:21, Bin Cheng wrote: > Hi, > This patch defines LOGICAL_OP_NON_SHORT_CIRCUIT in arm back-end by calling a > new hook function(logical_op_non_short_circuit") in tune_params structure. > For most cases the value of the macro is same as the default version in > fold-const.c, while it is "FALSE" to prefer short circuit when optimizing > for size on armv6-m processors. This brings us ~0.2% code size improvement > for CSiBE benchmark on cortex-m0. > > Also tunes on other ARM processes could be followed. > > No regression introduced, is it OK? > Thanks >
This all looks way too complex. It shouldn't be necessary to write a whole load of per-tune method calls to deal with this. What we need is a simple parameter in the tune table. So, LOGICAL_OP_NON_SHORT_CIRCUIT, should effectively be: if (OPTIMIZE_SIZE) return ANSWER_BASED_ON_ISA; else return tune->log_op_non_short_circuit[TARGET_ARM ? 0 : 1]; Where tune->log_op_non_short_circuit[2] is defined for each tuning table, to cover ARM and Thumb states. ANSWER_BASED_ON_ISA will do the right thing for Thumb1, Thumb2 and ARM depending on which leads to smallest code. R. > 2012-07-26 Bin Cheng <bin.ch...@arm.com> > > * config/arm/arm-cores.def (cortex-m1, cortex-m0, cortex-m0plus): > Use v6m. > * config/arm/arm-protos.h (tune_params): Add > logical_op_non_short_circuit hook. > * config/arm/arm.c (arm_default_logical_op_non_short_circuit) > (arm_v6m_logical_op_non_short_circuit): New functions. > (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune, > arm_xscale_tune) > (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a15_tune) > (arm_cortex_a5_tune, arm_cortex_a9_tune, arm_fa726te_tune): Set the > field > logical_op_non_short_circuit to > arm_default_logical_op_non_short_circuit. > (arm_v6m_tune): New tune_params struct. > * config/arm/arm.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Use the hook > logical_op_non_short_circuit from current_tune structure. > > > short-circuit-20120726.txt > > > Index: gcc/config/arm/arm.c > =================================================================== > --- gcc/config/arm/arm.c (revision 189835) > +++ gcc/config/arm/arm.c (working copy) > @@ -265,6 +265,9 @@ > static int arm_default_branch_cost (bool, bool); > static int arm_cortex_a5_branch_cost (bool, bool); > > +static bool arm_default_logical_op_non_short_circuit (void); > +static bool arm_v6m_logical_op_non_short_circuit (void); > + > static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, > const unsigned char *sel); > > @@ -876,7 +879,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > true, /* Prefer constant > pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > const struct tune_params arm_fastmul_tune = > @@ -888,7 +892,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > true, /* Prefer constant > pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > /* StrongARM has early execution of branches, so a sequence that is worth > @@ -903,7 +908,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > true, /* Prefer constant > pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > const struct tune_params arm_xscale_tune = > @@ -915,7 +921,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > true, /* Prefer constant > pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > const struct tune_params arm_9e_tune = > @@ -927,7 +934,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > true, /* Prefer constant > pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > const struct tune_params arm_v6t2_tune = > @@ -939,7 +947,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > false, /* Prefer constant pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > /* Generic Cortex tuning. Use more specific tunings if appropriate. */ > @@ -952,7 +961,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > false, /* Prefer constant pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > const struct tune_params arm_cortex_a15_tune = > @@ -964,7 +974,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > false, /* Prefer constant pool. */ > arm_default_branch_cost, > - true /* Prefer LDRD/STRD. */ > + true, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > /* Branches can be dual-issued on Cortex-A5, so conditional execution is > @@ -979,7 +990,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > false, /* Prefer constant pool. */ > arm_cortex_a5_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > const struct tune_params arm_cortex_a9_tune = > @@ -991,9 +1003,25 @@ > ARM_PREFETCH_BENEFICIAL(4,32,32), > false, /* Prefer constant pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > +/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than > + arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */ > +const struct tune_params arm_v6m_tune = > +{ > + arm_9e_rtx_costs, > + NULL, > + 1, /* Constant limit. */ > + 5, /* Max cond insns. */ > + ARM_PREFETCH_NOT_BENEFICIAL, > + false, /* Prefer constant pool. */ > + arm_default_branch_cost, > + false, /* Prefer LDRD/STRD. */ > + arm_v6m_logical_op_non_short_circuit, > +}; > + > const struct tune_params arm_fa726te_tune = > { > arm_9e_rtx_costs, > @@ -1003,7 +1031,8 @@ > ARM_PREFETCH_NOT_BENEFICIAL, > true, /* Prefer constant > pool. */ > arm_default_branch_cost, > - false /* Prefer LDRD/STRD. */ > + false, /* Prefer LDRD/STRD. */ > + arm_default_logical_op_non_short_circuit, > }; > > > @@ -8637,7 +8666,24 @@ > > return cost; > } > + > +static bool > +arm_default_logical_op_non_short_circuit (void) > +{ > + return (BRANCH_COST (optimize_function_for_speed_p (cfun), > + false) >= 2); > +} > > +static bool > +arm_v6m_logical_op_non_short_circuit (void) > +{ > + /* Prefer short circuit operation on armv6-m when optimizing for size. */ > + if (optimize_size) > + return false; > + > + return arm_default_logical_op_non_short_circuit (); > +} > + > static int > arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) > { > Index: gcc/config/arm/arm.h > =================================================================== > --- gcc/config/arm/arm.h (revision 189835) > +++ gcc/config/arm/arm.h (working copy) > @@ -1994,10 +1994,14 @@ > || (X) == arg_pointer_rtx) > > /* Try to generate sequences that don't involve branches, we can then use > - conditional instructions */ > + conditional instructions. */ > #define BRANCH_COST(speed_p, predictable_p) \ > (current_tune->branch_cost (speed_p, predictable_p)) > > +/* False if short circuit operation is preferred. */ > +#define LOGICAL_OP_NON_SHORT_CIRCUIT \ > + (current_tune->logical_op_non_short_circuit ()) > + > > /* Position Independent Code. */ > /* We decide which register to use based on the compilation options and > Index: gcc/config/arm/arm-cores.def > =================================================================== > --- gcc/config/arm/arm-cores.def (revision 189835) > +++ gcc/config/arm/arm-cores.def (working copy) > @@ -135,6 +135,6 @@ > ARM_CORE("cortex-r5", cortexr5, 7R, > FL_LDSCHED | FL_ARM_DIV, cortex) > ARM_CORE("cortex-m4", cortexm4, 7EM, > FL_LDSCHED, cortex) > ARM_CORE("cortex-m3", cortexm3, 7M, > FL_LDSCHED, cortex) > -ARM_CORE("cortex-m1", cortexm1, 6M, > FL_LDSCHED, cortex) > -ARM_CORE("cortex-m0", cortexm0, 6M, > FL_LDSCHED, cortex) > -ARM_CORE("cortex-m0plus", cortexm0plus, 6M, > FL_LDSCHED, cortex) > +ARM_CORE("cortex-m1", cortexm1, 6M, > FL_LDSCHED, v6m) > +ARM_CORE("cortex-m0", cortexm0, 6M, > FL_LDSCHED, v6m) > +ARM_CORE("cortex-m0plus", cortexm0plus, 6M, > FL_LDSCHED, v6m) > Index: gcc/config/arm/arm-protos.h > =================================================================== > --- gcc/config/arm/arm-protos.h (revision 189835) > +++ gcc/config/arm/arm-protos.h (working copy) > @@ -240,6 +240,7 @@ > int (*branch_cost) (bool, bool); > /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */ > bool prefer_ldrd_strd; > + bool (*logical_op_non_short_circuit) (void); > }; > > extern const struct tune_params *current_tune; >