Hi,
The CE pass has been adapted to work with the probability of then/else
branches. Now the transformation is done only when it's profitable.
Problem is the change affects both performance and size, causing size
regression in many cases (especially in C library like Newlib).
So this patch relaxes the probability condition when we are optimizing for
size.
Below is an example from Newlib:
unsigned int strlen (const char *);
void * realloc (void * __r, unsigned int __size) ;
void * memcpy (void *, const void *, unsigned int);
int argz_add(char **argz , unsigned int *argz_len , const char *str)
{
int len_to_add = 0;
unsigned int last = *argz_len;
if (str == ((void *)0))
return 0;
len_to_add = strlen(str) + 1;
*argz_len += len_to_add;
if(!(*argz = (char *)realloc(*argz, *argz_len)))
return 12;
memcpy(*argz + last, str, len_to_add);
return 0;
}
The generated assembly for Os/cortex-m0 is like:
argz_add:
push {r0, r1, r2, r4, r5, r6, r7, lr}
mov r6, r0
mov r7, r1
mov r4, r2
ldr r5, [r1]
beq .L3
mov r0, r2
bl strlen
add r0, r0, #1
add r1, r0, r5
str r0, [sp, #4]
str r1, [r7]
ldr r0, [r6]
bl realloc
mov r3, #12
str r0, [r6]
cmp r0, #0
beq .L2
add r0, r0, r5
mov r1, r4
ldr r2, [sp, #4]
bl memcpy
mov r3, #0
b .L2
.L3:
mov r3, r2
.L2:
mov r0, r3
In which branch/mov instructions around .L3 can be CEed with this patch.
During the work I observed passes before combine might interfere with CE
pass, so this patch is enabled for ce2/ce3 after combination pass.
It is tested on x86/thumb2 for both normal and Os. Is it ok for trunk?
2013-03-25 Bin Cheng <bin.ch...@arm.com>
* ifcvt.c (ifcvt_after_combine): New static variable.
(cheap_bb_rtx_cost_p): Set scale to REG_BR_PROB_BASE when optimizing
for size.
(rest_of_handle_if_conversion, rest_of_handle_if_after_combine):
Clear/set the variable ifcvt_after_combine.
Index: gcc/ifcvt.c
===================================================================
--- gcc/ifcvt.c (revision 197029)
+++ gcc/ifcvt.c (working copy)
@@ -67,6 +67,9 @@
#define NULL_BLOCK ((basic_block) NULL)
+/* TRUE if after combine pass. */
+static bool ifcvt_after_combine;
+
/* # of IF-THEN or IF-THEN-ELSE blocks we looked at */
static int num_possible_if_blocks;
@@ -144,8 +147,14 @@ cheap_bb_rtx_cost_p (const_basic_block bb, int sca
/* Our branch probability/scaling factors are just estimates and don't
account for cases where we can get speculation for free and other
secondary benefits. So we fudge the scale factor to make speculating
- appear a little more profitable. */
+ appear a little more profitable when optimizing for performance. */
scale += REG_BR_PROB_BASE / 8;
+
+ /* Set the scale to REG_BR_PROB_BASE to be more agressive when
+ optimizing for size and after combine pass. */
+ if (!optimize_function_for_speed_p (cfun) && ifcvt_after_combine)
+ scale = REG_BR_PROB_BASE;
+
max_cost *= scale;
while (1)
@@ -4445,6 +4454,7 @@ gate_handle_if_conversion (void)
static unsigned int
rest_of_handle_if_conversion (void)
{
+ ifcvt_after_combine = false;
if (flag_if_conversion)
{
if (dump_file)
@@ -4494,6 +4504,7 @@ gate_handle_if_after_combine (void)
static unsigned int
rest_of_handle_if_after_combine (void)
{
+ ifcvt_after_combine = true;
if_convert ();
return 0;
}