This adds a penalty of 4 to the post-reload branch costs.
Purpose is reduce the number of out-of-line blocks like in unsigned long variant5 (unsigned in) { unsigned long out = 0; if (in & (1 << 0)) out |= 0xful << (4*0); if (in & (1 << 1)) out |= 0xful << (4*1); if (in & (1 << 2)) out |= 0xful << (4*2); if (in & (1 << 3)) out |= 0xful << (4*3); return out; } without the patch, code is variant5: mov r18,r24 ; 67 movqi_insn/1 [length = 1] sbrs r24,0 ; 10 *sbrx_branchhi [length = 2] rjmp .L6 ldi r22,lo8(15) ; 5 *movsi/5 [length = 4] ldi r23,0 ldi r24,0 ldi r25,0 .L2: <some_code_and_epilogue> .L6: ldi r22,0 ; 4 *movsi/2 [length = 3] ldi r23,0 movw r24,r22 rjmp .L2 ; 74 jump [length = 1] and with the patch it reads: variant5: mov r18,r24 ; 67 movqi_insn/1 [length = 1] ldi r22,lo8(15) ; 5 *movsi/5 [length = 4] ldi r23,0 ldi r24,0 ldi r25,0 sbrc r18,0 ; 10 *sbrx_branchhi [length = 2] rjmp .L2 ldi r22,0 ; 4 *movsi/2 [length = 3] ldi r23,0 movw r24,r22 .L2: <some_code_and_epilogue> Using fall-through safes insn 74. Main blocker for not increasing default branch costs in general is do_store_flag which is a heap of assertions not using rtx_costs and which gives the best results with the old default of 0, which is not changed. Tested without regressions. Ok for trunk? Johann * config/avr/avr.h (BRANCH_COST) [reload_completed]: Increase by 4.
Index: config/avr/avr.h =================================================================== --- config/avr/avr.h (revision 244001) +++ config/avr/avr.h (working copy) @@ -360,7 +360,12 @@ typedef struct avr_args } \ } while (0) -#define BRANCH_COST(speed_p, predictable_p) avr_branch_cost +/* We increase branch costs after reload in order to keep basic-block + reordering from introducing out-of-line jumps and to prefer fall-through + edges instead. The default branch costs are 0, mainly because otherwise + do_store_flag might come up with bloated code. */ +#define BRANCH_COST(speed_p, predictable_p) \ + (avr_branch_cost + (reload_completed ? 4 : 0)) #define SLOW_BYTE_ACCESS 0