Hi DJ, The patch below contains two minor enhancements for the RL78 prologue and epilogue code. The first is for when a large amount of local stack space needs to be allocated. Rather than generating a long sequence of SUB SP, #<byte> instructions, the patched codes moves SP into AX, performs a single subtraction there, and then moves AX back into SP.
The second optimization is for the G10. It allows the BC, DE and HL registers to be pushed and popped directly, rather than via moving them into AX first. Tested with no regressions on an rl78-elf toolchain. OK to apply ? Cheers Nick 2015-01-26 Nick Clifton <ni...@redhat.com> * config/rl78/rl78.c (rl78_expand_prologue): In G10 mode push the BC, DE and HL registers directly, not via AX. When decrementing the stack pointer by a large amount, transfer SP into AX and perform the subtraction there. (rl78_expand_epilogue): Perform the inverse of the above enhancements. Index: gcc/config/rl78/rl78.c =================================================================== --- gcc/config/rl78/rl78.c (revision 220126) +++ gcc/config/rl78/rl78.c (working copy) @@ -1230,6 +1230,7 @@ { int i, fs; rtx sp = gen_rtx_REG (HImode, STACK_POINTER_REGNUM); + rtx ax = gen_rtx_REG (HImode, AX_REG); int rb = 0; if (rl78_is_naked_func ()) @@ -1253,15 +1254,19 @@ for (i = 0; i < 16; i++) if (cfun->machine->need_to_push [i]) { + int reg = i * 2; + if (TARGET_G10) { - if (i != 0) - emit_move_insn (gen_rtx_REG (HImode, AX_REG), gen_rtx_REG (HImode, i * 2)); - F (emit_insn (gen_push (gen_rtx_REG (HImode, AX_REG)))); + if (reg >= 8) + { + emit_move_insn (ax, gen_rtx_REG (HImode, reg)); + reg = AX_REG; + } } else { - int need_bank = i / 4; + int need_bank = i/4; if (need_bank != rb) { @@ -1268,9 +1273,9 @@ emit_insn (gen_sel_rb (GEN_INT (need_bank))); rb = need_bank; } - F (emit_insn (gen_push (gen_rtx_REG (HImode, i * 2)))); + } - } + F (emit_insn (gen_push (gen_rtx_REG (HImode, reg)))); } if (rb != 0) @@ -1280,23 +1285,41 @@ if (is_interrupt_func (cfun->decl) && cfun->machine->uses_es) { emit_insn (gen_movqi_from_es (gen_rtx_REG (QImode, A_REG))); - F (emit_insn (gen_push (gen_rtx_REG (HImode, AX_REG)))); + F (emit_insn (gen_push (ax))); } if (frame_pointer_needed) { - F (emit_move_insn (gen_rtx_REG (HImode, AX_REG), - gen_rtx_REG (HImode, STACK_POINTER_REGNUM))); - F (emit_move_insn (gen_rtx_REG (HImode, FRAME_POINTER_REGNUM), - gen_rtx_REG (HImode, AX_REG))); + F (emit_move_insn (ax, sp)); + F (emit_move_insn (gen_rtx_REG (HImode, FRAME_POINTER_REGNUM), ax)); } fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing; - while (fs > 0) + if (fs > 0) { - int fs_byte = (fs > 254) ? 254 : fs; - F (emit_insn (gen_subhi3 (sp, sp, GEN_INT (fs_byte)))); - fs -= fs_byte; + /* If we need to subtract more than 254*3 then it is faster and + smaller to move SP into AX and perform the subtraction there. */ + if (fs > 254 * 3) + { + rtx insn; + + emit_move_insn (ax, sp); + emit_insn (gen_subhi3 (ax, ax, GEN_INT (fs))); + insn = emit_move_insn (sp, ax); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (SImode, sp, + gen_rtx_PLUS (HImode, sp, GEN_INT (-fs)))); + } + else + { + while (fs > 0) + { + int fs_byte = (fs > 254) ? 254 : fs; + + F (emit_insn (gen_subhi3 (sp, sp, GEN_INT (fs_byte)))); + fs -= fs_byte; + } + } } } @@ -1306,6 +1329,7 @@ { int i, fs; rtx sp = gen_rtx_REG (HImode, STACK_POINTER_REGNUM); + rtx ax = gen_rtx_REG (HImode, AX_REG); int rb = 0; if (rl78_is_naked_func ()) @@ -1313,20 +1337,27 @@ if (frame_pointer_needed) { - emit_move_insn (gen_rtx_REG (HImode, AX_REG), - gen_rtx_REG (HImode, FRAME_POINTER_REGNUM)); - emit_move_insn (gen_rtx_REG (HImode, STACK_POINTER_REGNUM), - gen_rtx_REG (HImode, AX_REG)); + emit_move_insn (ax, gen_rtx_REG (HImode, FRAME_POINTER_REGNUM)); + emit_move_insn (sp, ax); } else { fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing; - while (fs > 0) + if (fs > 254 * 3) { - int fs_byte = (fs > 254) ? 254 : fs; + emit_move_insn (ax, sp); + emit_insn (gen_addhi3 (ax, ax, GEN_INT (fs))); + emit_move_insn (sp, ax); + } + else + { + while (fs > 0) + { + int fs_byte = (fs > 254) ? 254 : fs; - emit_insn (gen_addhi3 (sp, sp, GEN_INT (fs_byte))); - fs -= fs_byte; + emit_insn (gen_addhi3 (sp, sp, GEN_INT (fs_byte))); + fs -= fs_byte; + } } } @@ -1343,11 +1374,11 @@ if (TARGET_G10) { - rtx ax = gen_rtx_REG (HImode, AX_REG); - - emit_insn (gen_pop (ax)); - if (i != 0) + if (i < 8) + emit_insn (gen_pop (dest)); + else { + emit_insn (gen_pop (ax)); emit_move_insn (dest, ax); /* Generate a USE of the pop'd register so that DCE will not eliminate the move. */ emit_insn (gen_use (dest));