Hi DJ,

  The patch below contains two minor enhancements for the RL78 prologue
  and epilogue code.  The first is for when a large amount of local
  stack space needs to be allocated.  Rather than generating a long
  sequence of SUB SP, #<byte> instructions, the patched codes moves SP
  into AX, performs a single subtraction there, and then moves AX back
  into SP.

  The second optimization is for the G10.  It allows the BC, DE and HL
  registers to be pushed and popped directly, rather than via moving
  them into AX first.

  Tested with no regressions on an rl78-elf toolchain.

  OK to apply ?

Cheers
  Nick

2015-01-26  Nick Clifton  <ni...@redhat.com>

        * config/rl78/rl78.c (rl78_expand_prologue): In G10 mode push the
        BC, DE and HL registers directly, not via AX.
        When decrementing the stack pointer by a large amount, transfer SP
        into AX and perform the subtraction there.
        (rl78_expand_epilogue): Perform the inverse of the above
        enhancements.

Index: gcc/config/rl78/rl78.c
===================================================================
--- gcc/config/rl78/rl78.c      (revision 220126)
+++ gcc/config/rl78/rl78.c      (working copy)
@@ -1230,6 +1230,7 @@
 {
   int i, fs;
   rtx sp = gen_rtx_REG (HImode, STACK_POINTER_REGNUM);
+  rtx ax = gen_rtx_REG (HImode, AX_REG);
   int rb = 0;
 
   if (rl78_is_naked_func ())
@@ -1253,15 +1254,19 @@
   for (i = 0; i < 16; i++)
     if (cfun->machine->need_to_push [i])
       {
+       int reg = i * 2;
+
        if (TARGET_G10)
          {
-           if (i != 0)
-             emit_move_insn (gen_rtx_REG (HImode, AX_REG), gen_rtx_REG 
(HImode, i * 2));
-           F (emit_insn (gen_push (gen_rtx_REG (HImode, AX_REG))));
+           if (reg >= 8)
+             {
+               emit_move_insn (ax, gen_rtx_REG (HImode, reg));
+               reg = AX_REG;
+             }
          }
        else
          {
-           int need_bank = i / 4;
+           int need_bank = i/4;
 
            if (need_bank != rb)
              {
@@ -1268,9 +1273,9 @@
                emit_insn (gen_sel_rb (GEN_INT (need_bank)));
                rb = need_bank;
              }
-           F (emit_insn (gen_push (gen_rtx_REG (HImode, i * 2))));
+         }
 
-         }
+       F (emit_insn (gen_push (gen_rtx_REG (HImode, reg))));
       }
 
   if (rb != 0)
@@ -1280,23 +1285,41 @@
   if (is_interrupt_func (cfun->decl) && cfun->machine->uses_es)
     {
       emit_insn (gen_movqi_from_es (gen_rtx_REG (QImode, A_REG)));
-      F (emit_insn (gen_push (gen_rtx_REG (HImode, AX_REG))));
+      F (emit_insn (gen_push (ax)));
     }
 
   if (frame_pointer_needed)
     {
-      F (emit_move_insn (gen_rtx_REG (HImode, AX_REG),
-                        gen_rtx_REG (HImode, STACK_POINTER_REGNUM)));
-      F (emit_move_insn (gen_rtx_REG (HImode, FRAME_POINTER_REGNUM),
-                        gen_rtx_REG (HImode, AX_REG)));
+      F (emit_move_insn (ax, sp));
+      F (emit_move_insn (gen_rtx_REG (HImode, FRAME_POINTER_REGNUM), ax));
     }
 
   fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing;
-  while (fs > 0)
+  if (fs > 0)
     {
-      int fs_byte = (fs > 254) ? 254 : fs;
-      F (emit_insn (gen_subhi3 (sp, sp, GEN_INT (fs_byte))));
-      fs -= fs_byte;
+      /* If we need to subtract more than 254*3 then it is faster and
+        smaller to move SP into AX and perform the subtraction there.  */
+      if (fs > 254 * 3)
+       {
+         rtx insn;
+
+         emit_move_insn (ax, sp);
+         emit_insn (gen_subhi3 (ax, ax, GEN_INT (fs)));
+         insn = emit_move_insn (sp, ax);
+         add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+                       gen_rtx_SET (SImode, sp,
+                                    gen_rtx_PLUS (HImode, sp, GEN_INT (-fs))));
+       }
+      else
+       {
+         while (fs > 0)
+           {
+             int fs_byte = (fs > 254) ? 254 : fs;
+
+             F (emit_insn (gen_subhi3 (sp, sp, GEN_INT (fs_byte))));
+             fs -= fs_byte;
+           }
+       }
     }
 }
 
@@ -1306,6 +1329,7 @@
 {
   int i, fs;
   rtx sp = gen_rtx_REG (HImode, STACK_POINTER_REGNUM);
+  rtx ax = gen_rtx_REG (HImode, AX_REG);
   int rb = 0;
 
   if (rl78_is_naked_func ())
@@ -1313,20 +1337,27 @@
 
   if (frame_pointer_needed)
     {
-      emit_move_insn (gen_rtx_REG (HImode, AX_REG),
-                     gen_rtx_REG (HImode, FRAME_POINTER_REGNUM));
-      emit_move_insn (gen_rtx_REG (HImode, STACK_POINTER_REGNUM),
-                     gen_rtx_REG (HImode, AX_REG));
+      emit_move_insn (ax, gen_rtx_REG (HImode, FRAME_POINTER_REGNUM));
+      emit_move_insn (sp, ax);
     }
   else
     {
       fs = cfun->machine->framesize_locals + cfun->machine->framesize_outgoing;
-      while (fs > 0)
+      if (fs > 254 * 3)
        {
-         int fs_byte = (fs > 254) ? 254 : fs;
+         emit_move_insn (ax, sp);
+         emit_insn (gen_addhi3 (ax, ax, GEN_INT (fs)));
+         emit_move_insn (sp, ax);
+       }
+      else
+       {
+         while (fs > 0)
+           {
+             int fs_byte = (fs > 254) ? 254 : fs;
 
-         emit_insn (gen_addhi3 (sp, sp, GEN_INT (fs_byte)));
-         fs -= fs_byte;
+             emit_insn (gen_addhi3 (sp, sp, GEN_INT (fs_byte)));
+             fs -= fs_byte;
+           }
        }
     }
 
@@ -1343,11 +1374,11 @@
 
        if (TARGET_G10)
          {
-           rtx ax = gen_rtx_REG (HImode, AX_REG);
-
-           emit_insn (gen_pop (ax));
-           if (i != 0)
+           if (i < 8)
+             emit_insn (gen_pop (dest));
+           else
              {
+               emit_insn (gen_pop (ax));
                emit_move_insn (dest, ax);
                /* Generate a USE of the pop'd register so that DCE will not 
eliminate the move.  */
                emit_insn (gen_use (dest));

Reply via email to