On 26/07/12 11:21, Bin Cheng wrote:
> Hi,
> This patch defines LOGICAL_OP_NON_SHORT_CIRCUIT in arm back-end by calling a
> new hook function(logical_op_non_short_circuit") in tune_params structure.
> For most cases the value of the macro is same as the default version in
> fold-const.c, while it is "FALSE" to prefer short circuit when optimizing
> for size on armv6-m processors. This brings us ~0.2% code size improvement
> for CSiBE benchmark on cortex-m0.
>  
> Also tunes on other ARM processes could be followed.
> 
> No regression introduced, is it OK?
> Thanks
> 

This all looks way too complex.  It shouldn't be necessary to
write a whole load of per-tune method calls to deal with this.  What we
need is a simple parameter in the tune table.

So, LOGICAL_OP_NON_SHORT_CIRCUIT, should effectively be:

    if (OPTIMIZE_SIZE)
        return ANSWER_BASED_ON_ISA;
    else
        return tune->log_op_non_short_circuit[TARGET_ARM ? 0 : 1];

Where tune->log_op_non_short_circuit[2] is defined for each tuning
table, to cover ARM and Thumb states.

ANSWER_BASED_ON_ISA will do the right thing for Thumb1, Thumb2 and ARM
depending on which leads to smallest code.

R.

> 2012-07-26  Bin Cheng  <bin.ch...@arm.com>
> 
>       * config/arm/arm-cores.def (cortex-m1, cortex-m0, cortex-m0plus):
> Use v6m.
>       * config/arm/arm-protos.h (tune_params): Add
> logical_op_non_short_circuit hook.
>       * config/arm/arm.c (arm_default_logical_op_non_short_circuit)
>       (arm_v6m_logical_op_non_short_circuit): New functions.
>       (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
> arm_xscale_tune)
>       (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a15_tune)
>       (arm_cortex_a5_tune, arm_cortex_a9_tune, arm_fa726te_tune): Set the
> field
>       logical_op_non_short_circuit to
> arm_default_logical_op_non_short_circuit.
>       (arm_v6m_tune): New tune_params struct.
>       * config/arm/arm.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Use the hook
>       logical_op_non_short_circuit from current_tune structure.
> 
> 
> short-circuit-20120726.txt
> 
> 
> Index: gcc/config/arm/arm.c
> ===================================================================
> --- gcc/config/arm/arm.c      (revision 189835)
> +++ gcc/config/arm/arm.c      (working copy)
> @@ -265,6 +265,9 @@
>  static int arm_default_branch_cost (bool, bool);
>  static int arm_cortex_a5_branch_cost (bool, bool);
>  
> +static bool arm_default_logical_op_non_short_circuit (void);
> +static bool arm_v6m_logical_op_non_short_circuit (void);
> +
>  static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
>                                            const unsigned char *sel);
>  
> @@ -876,7 +879,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,                                              /* Prefer constant 
> pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_fastmul_tune =
> @@ -888,7 +892,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,                                              /* Prefer constant 
> pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  /* StrongARM has early execution of branches, so a sequence that is worth
> @@ -903,7 +908,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,                                              /* Prefer constant 
> pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_xscale_tune =
> @@ -915,7 +921,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,                                              /* Prefer constant 
> pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_9e_tune =
> @@ -927,7 +934,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,                                              /* Prefer constant 
> pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_v6t2_tune =
> @@ -939,7 +947,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,                                     /* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
> @@ -952,7 +961,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,                                     /* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_cortex_a15_tune =
> @@ -964,7 +974,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,                                     /* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  true                                          /* Prefer LDRD/STRD.  */
> +  true,                                              /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  /* Branches can be dual-issued on Cortex-A5, so conditional execution is
> @@ -979,7 +990,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,                                     /* Prefer constant pool.  */
>    arm_cortex_a5_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_cortex_a9_tune =
> @@ -991,9 +1003,25 @@
>    ARM_PREFETCH_BENEFICIAL(4,32,32),
>    false,                                     /* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
> +/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
> +   arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
> +const struct tune_params arm_v6m_tune =
> +{
> +  arm_9e_rtx_costs,
> +  NULL,
> +  1,                                         /* Constant limit.  */
> +  5,                                         /* Max cond insns.  */
> +  ARM_PREFETCH_NOT_BENEFICIAL,
> +  false,                                     /* Prefer constant pool.  */
> +  arm_default_branch_cost,
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_v6m_logical_op_non_short_circuit,
> +};
> +
>  const struct tune_params arm_fa726te_tune =
>  {
>    arm_9e_rtx_costs,
> @@ -1003,7 +1031,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,                                              /* Prefer constant 
> pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,                                     /* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  
> @@ -8637,7 +8666,24 @@
>  
>    return cost;
>  }
> + 
> +static bool
> +arm_default_logical_op_non_short_circuit (void)
> +{
> +  return (BRANCH_COST (optimize_function_for_speed_p (cfun),
> +                    false) >= 2);
> +}
>  
> +static bool
> +arm_v6m_logical_op_non_short_circuit (void)
> +{
> +  /* Prefer short circuit operation on armv6-m when optimizing for size.  */
> +  if (optimize_size)
> +    return false;
> +
> +  return arm_default_logical_op_non_short_circuit ();
> +}
> +
>  static int
>  arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
>  {
> Index: gcc/config/arm/arm.h
> ===================================================================
> --- gcc/config/arm/arm.h      (revision 189835)
> +++ gcc/config/arm/arm.h      (working copy)
> @@ -1994,10 +1994,14 @@
>     || (X) == arg_pointer_rtx)
>  
>  /* Try to generate sequences that don't involve branches, we can then use
> -   conditional instructions */
> +   conditional instructions.  */
>  #define BRANCH_COST(speed_p, predictable_p) \
>    (current_tune->branch_cost (speed_p, predictable_p))
>  
> +/* False if short circuit operation is preferred.  */
> +#define LOGICAL_OP_NON_SHORT_CIRCUIT                         \
> +  (current_tune->logical_op_non_short_circuit ())
> +
>  
>  /* Position Independent Code.  */
>  /* We decide which register to use based on the compilation options and
> Index: gcc/config/arm/arm-cores.def
> ===================================================================
> --- gcc/config/arm/arm-cores.def      (revision 189835)
> +++ gcc/config/arm/arm-cores.def      (working copy)
> @@ -135,6 +135,6 @@
>  ARM_CORE("cortex-r5",          cortexr5,     7R,                             
>  FL_LDSCHED | FL_ARM_DIV, cortex)
>  ARM_CORE("cortex-m4",          cortexm4,     7EM,                            
>  FL_LDSCHED, cortex)
>  ARM_CORE("cortex-m3",          cortexm3,     7M,                             
>  FL_LDSCHED, cortex)
> -ARM_CORE("cortex-m1",          cortexm1,     6M,                             
>  FL_LDSCHED, cortex)
> -ARM_CORE("cortex-m0",          cortexm0,     6M,                             
>  FL_LDSCHED, cortex)
> -ARM_CORE("cortex-m0plus", cortexm0plus,      6M,                             
>  FL_LDSCHED, cortex)
> +ARM_CORE("cortex-m1",          cortexm1,     6M,                             
>  FL_LDSCHED, v6m)
> +ARM_CORE("cortex-m0",          cortexm0,     6M,                             
>  FL_LDSCHED, v6m)
> +ARM_CORE("cortex-m0plus", cortexm0plus,      6M,                             
>  FL_LDSCHED, v6m)
> Index: gcc/config/arm/arm-protos.h
> ===================================================================
> --- gcc/config/arm/arm-protos.h       (revision 189835)
> +++ gcc/config/arm/arm-protos.h       (working copy)
> @@ -240,6 +240,7 @@
>    int (*branch_cost) (bool, bool);
>    /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM.  */
>    bool prefer_ldrd_strd;
> +  bool (*logical_op_non_short_circuit) (void);
>  };
>  
>  extern const struct tune_params *current_tune;
> 




Reply via email to