Hi,
This patch defines LOGICAL_OP_NON_SHORT_CIRCUIT in arm back-end by calling a
new hook function(logical_op_non_short_circuit") in tune_params structure.
For most cases the value of the macro is same as the default version in
fold-const.c, while it is "FALSE" to prefer short circuit when optimizing
for size on armv6-m processors. This brings us ~0.2% code size improvement
for CSiBE benchmark on cortex-m0.
Also tunes on other ARM processes could be followed.
No regression introduced, is it OK?
Thanks
2012-07-26 Bin Cheng <[email protected]>
* config/arm/arm-cores.def (cortex-m1, cortex-m0, cortex-m0plus):
Use v6m.
* config/arm/arm-protos.h (tune_params): Add
logical_op_non_short_circuit hook.
* config/arm/arm.c (arm_default_logical_op_non_short_circuit)
(arm_v6m_logical_op_non_short_circuit): New functions.
(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
arm_xscale_tune)
(arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a15_tune)
(arm_cortex_a5_tune, arm_cortex_a9_tune, arm_fa726te_tune): Set the
field
logical_op_non_short_circuit to
arm_default_logical_op_non_short_circuit.
(arm_v6m_tune): New tune_params struct.
* config/arm/arm.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Use the hook
logical_op_non_short_circuit from current_tune structure.
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c (revision 189835)
+++ gcc/config/arm/arm.c (working copy)
@@ -265,6 +265,9 @@
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
+static bool arm_default_logical_op_non_short_circuit (void);
+static bool arm_v6m_logical_op_non_short_circuit (void);
+
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
@@ -876,7 +879,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant
pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
const struct tune_params arm_fastmul_tune =
@@ -888,7 +892,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant
pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
/* StrongARM has early execution of branches, so a sequence that is worth
@@ -903,7 +908,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant
pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
const struct tune_params arm_xscale_tune =
@@ -915,7 +921,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant
pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
const struct tune_params arm_9e_tune =
@@ -927,7 +934,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant
pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
const struct tune_params arm_v6t2_tune =
@@ -939,7 +947,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -952,7 +961,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
const struct tune_params arm_cortex_a15_tune =
@@ -964,7 +974,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_default_branch_cost,
- true /* Prefer LDRD/STRD. */
+ true, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -979,7 +990,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_cortex_a5_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
const struct tune_params arm_cortex_a9_tune =
@@ -991,9 +1003,25 @@
ARM_PREFETCH_BENEFICIAL(4,32,32),
false, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
+/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
+ arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
+const struct tune_params arm_v6m_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost,
+ false, /* Prefer LDRD/STRD. */
+ arm_v6m_logical_op_non_short_circuit,
+};
+
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
@@ -1003,7 +1031,8 @@
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant
pool. */
arm_default_branch_cost,
- false /* Prefer LDRD/STRD. */
+ false, /* Prefer LDRD/STRD. */
+ arm_default_logical_op_non_short_circuit,
};
@@ -8637,7 +8666,24 @@
return cost;
}
+
+static bool
+arm_default_logical_op_non_short_circuit (void)
+{
+ return (BRANCH_COST (optimize_function_for_speed_p (cfun),
+ false) >= 2);
+}
+static bool
+arm_v6m_logical_op_non_short_circuit (void)
+{
+ /* Prefer short circuit operation on armv6-m when optimizing for size. */
+ if (optimize_size)
+ return false;
+
+ return arm_default_logical_op_non_short_circuit ();
+}
+
static int
arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
{
Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h (revision 189835)
+++ gcc/config/arm/arm.h (working copy)
@@ -1994,10 +1994,14 @@
|| (X) == arg_pointer_rtx)
/* Try to generate sequences that don't involve branches, we can then use
- conditional instructions */
+ conditional instructions. */
#define BRANCH_COST(speed_p, predictable_p) \
(current_tune->branch_cost (speed_p, predictable_p))
+/* False if short circuit operation is preferred. */
+#define LOGICAL_OP_NON_SHORT_CIRCUIT \
+ (current_tune->logical_op_non_short_circuit ())
+
/* Position Independent Code. */
/* We decide which register to use based on the compilation options and
Index: gcc/config/arm/arm-cores.def
===================================================================
--- gcc/config/arm/arm-cores.def (revision 189835)
+++ gcc/config/arm/arm-cores.def (working copy)
@@ -135,6 +135,6 @@
ARM_CORE("cortex-r5", cortexr5, 7R,
FL_LDSCHED | FL_ARM_DIV, cortex)
ARM_CORE("cortex-m4", cortexm4, 7EM,
FL_LDSCHED, cortex)
ARM_CORE("cortex-m3", cortexm3, 7M,
FL_LDSCHED, cortex)
-ARM_CORE("cortex-m1", cortexm1, 6M,
FL_LDSCHED, cortex)
-ARM_CORE("cortex-m0", cortexm0, 6M,
FL_LDSCHED, cortex)
-ARM_CORE("cortex-m0plus", cortexm0plus, 6M,
FL_LDSCHED, cortex)
+ARM_CORE("cortex-m1", cortexm1, 6M,
FL_LDSCHED, v6m)
+ARM_CORE("cortex-m0", cortexm0, 6M,
FL_LDSCHED, v6m)
+ARM_CORE("cortex-m0plus", cortexm0plus, 6M,
FL_LDSCHED, v6m)
Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h (revision 189835)
+++ gcc/config/arm/arm-protos.h (working copy)
@@ -240,6 +240,7 @@
int (*branch_cost) (bool, bool);
/* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */
bool prefer_ldrd_strd;
+ bool (*logical_op_non_short_circuit) (void);
};
extern const struct tune_params *current_tune;