This is a tentative patch to the PR. As Jakub explained, GCC does not support
configurations with FP = SP and DSE optimizer generates wrong code for AVR.

This patch implements Jakub's proposal to hack around by hiding the action of
setting/moving between SP and SP into UNSPEC[_VOLATILE]s.

All works fine with respect to code generation.
But what I cannot manage getting debug information right.

Without RTX_FRAME_RELATED_P at most insns, I just get one new dwarf-ICE:

dwarf2out_frame_debug_adjust_cfa[test2:dwarf2(233)]: pat = (set (reg/f:HI 28 
r28)
    (plus:HI (reg/f:HI 32 __SP_L__)
        (const_int -40 [0xffffffd8])))
dwf_regno (XEXP (src, 0)) = 32
cur_cfa->reg = 28
./gcc/testsuite/gcc.c-torture/execute/builtins/snprintf-chk.c: In function
'test2':
./gcc/testsuite/gcc.c-torture/execute/builtins/snprintf-chk.c:159:1: internal
compiler error: in dwarf2out_frame_debug_adjust_cfa, at dwarf2cfi.c:1098

However, if all insns that are involved in SP/FP manipulation get the
RTX_FRAME_RELATED_P, almost every test case that has -g crashes with this
dwarf-ICE.

I am stuck now and don't know how to proceed with this.

Many thanks, Richard, for your help with the CFA notes.
Unfortunately, adding the RTX_FRAME_RELATED_P markers shreds every...

Attached the patch as-is and a plain work-copy of
avr.c:avr_prologue_setup_frame

Johann

        PR other/50063
        * config/avr/avr.md (UNSPEC_READ_SP): New define_c_enum "unspec".
        (read_sp): New insn.
        (movhi_sp_r): Handle -1 (unknown IRQ state) and 2 (8-bit SP) in
        operand 2.
        * config/avr/avr.c (avr_prologue_setup_frame): Adjust prologue
        setup to use read_sp instead of vanilla move.
        Adjust REG_CFA notes to superseed unspec.
        (expand_epilogue):  Adjust epilogue setup to use read_sp instead
        of vanilla move.
        As function body might contain CLI or SEI: Use irq_state 0 (IRQ
        known to be off) only with TARGET_NO_INTERRUPTS. Never use
        irq_state 1 (IRQ known to be on) here.

static void
emit_push_byte (unsigned regno, bool frame_related_p)
{
  rtx mem, reg, insn;

  mem = gen_rtx_POST_DEC (HImode, stack_pointer_rtx);
  mem = gen_frame_mem (QImode, mem);
  reg = gen_rtx_REG (QImode, regno);

  insn = emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
  if (frame_related_p)
    RTX_FRAME_RELATED_P (insn) = 1;

  cfun->machine->stack_usage++;
}


/*  Helper for expand_prologue.  Emit a push of a SFR via tmp_reg.
    SFR is a MEM representing the memory location of the SFR.
    If CLR_P then clear the SFR after the push using zero_reg.  */

static void
emit_push_sfr (rtx sfr, bool frame_related_p, bool clr_p)
{
  rtx insn;
  
  gcc_assert (MEM_P (sfr));

  /* IN __tmp_reg__, IO(SFR) */
  insn = emit_move_insn (tmp_reg_rtx, sfr);
  if (frame_related_p)
    RTX_FRAME_RELATED_P (insn) = 1;
  
  /* PUSH __tmp_reg__ */
  emit_push_byte (TMP_REGNO, frame_related_p);

  if (clr_p)
    {
      /* OUT IO(SFR), __zero_reg__ */
      insn = emit_move_insn (sfr, const0_rtx);
      if (frame_related_p)
        RTX_FRAME_RELATED_P (insn) = 1;
    }
}

static void
avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
{
  rtx insn;
  bool isr_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
  int live_seq = sequent_regs_live ();

  bool minimize = (TARGET_CALL_PROLOGUES
                   && live_seq
                   && !isr_p
                   && !cfun->machine->is_OS_task
                   && !cfun->machine->is_OS_main);
  
  if (minimize
      && (frame_pointer_needed
          || avr_outgoing_args_size() > 8
          || (AVR_2_BYTE_PC && live_seq > 6)
          || live_seq > 7)) 
    {
      rtx pattern;
      int first_reg, reg, offset;

      emit_move_insn (gen_rtx_REG (HImode, REG_X), 
                      gen_int_mode (size, HImode));

      pattern = gen_call_prologue_saves (gen_int_mode (live_seq, HImode),
                                         gen_int_mode (live_seq+size, HImode));
      insn = emit_insn (pattern);
      RTX_FRAME_RELATED_P (insn) = 1;

      /* Describe the effect of the unspec_volatile call to prologue_saves.
         Note that this formulation assumes that add_reg_note pushes the
         notes to the front.  Thus we build them in the reverse order of
         how we want dwarf2out to process them.  */

      /* The function does always set frame_pointer_rtx, but whether that
         is going to be permanent in the function is frame_pointer_needed.  */

      add_reg_note (insn, REG_CFA_ADJUST_CFA,
                    gen_rtx_SET (VOIDmode, (frame_pointer_needed
                                            ? frame_pointer_rtx
                                            : stack_pointer_rtx),
                                 plus_constant (stack_pointer_rtx,
                                                -(size + live_seq))));

      /* Note that live_seq always contains r28+r29, but the other
         registers to be saved are all below 18.  */

      first_reg = 18 - (live_seq - 2);

      for (reg = 29, offset = -live_seq + 1;
           reg >= first_reg;
           reg = (reg == 28 ? 17 : reg - 1), ++offset)
        {
          rtx m, r;

          m = gen_rtx_MEM (QImode, plus_constant (stack_pointer_rtx, offset));
          r = gen_rtx_REG (QImode, reg);
          add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, m, r));
        }

      cfun->machine->stack_usage += size + live_seq;
    }
  else /* !minimize */
    {
      int reg;
      
      for (reg = 0; reg < 32; ++reg)
        if (TEST_HARD_REG_BIT (set, reg))
          emit_push_byte (reg, true);

      if (frame_pointer_needed
          && (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main)))
        {
          /* Push frame pointer.  Always be consistent about the
             ordering of pushes -- epilogue_restores expects the
             register pair to be pushed low byte first.  */
          
          emit_push_byte (REG_Y, true);
          emit_push_byte (REG_Y + 1, true);
        }
          
      if (frame_pointer_needed
          && size == 0)
        {
          insn = emit_insn (gen_read_sp (frame_pointer_rtx, stack_pointer_rtx));
          RTX_FRAME_RELATED_P (insn) = 1;
          add_reg_note (insn, REG_CFA_ADJUST_CFA,
                        gen_rtx_SET (VOIDmode, frame_pointer_rtx,
                                     stack_pointer_rtx));
        }
      
      if (size != 0)
        {
          /*  Creating a frame can be done by direct manipulation of the
              stack or via the frame pointer. These two methods are:
                  fp =  sp
                  fp -= size
                  sp =  fp
              or
                  sp -= size
                  fp =  sp    (*)
              the optimum method depends on function type, stack and
              frame size.  To avoid a complex logic, both methods are
              tested and shortest is selected.

              There is also the case where SIZE != 0 and no frame pointer is
              needed; this can occur if ACCUMULATE_OUTGOING_ARGS is on.
              In that case, insn (*) is not needed in that case.
              We use the X register as scratch. This is save because in X
              is call-clobbered.
                 In an interrupt routine, the case of SIZE != 0 together with
              !frame_pointer_needed can only occur if the function is not a
              leaf function and thus X has already been saved.  */
              
          int irq_state = -1;
          rtx fp_plus_insns, fp, my_fp;

          gcc_assert (frame_pointer_needed
                      || !isr_p
                      || !current_function_is_leaf);
          
          fp = my_fp = (frame_pointer_needed
                        ? frame_pointer_rtx
                        : gen_rtx_REG (Pmode, REG_X));
          
          if (AVR_HAVE_8BIT_SP)
            {
              /* The high byte (r29) does not change:
                 Prefer SUBI (1 cycle) over ADIW (2 cycles, same size).  */

              my_fp = all_regs_rtx[FRAME_POINTER_REGNUM];
            }

          /************  Method 1: Adjust frame pointer  ************/
          
          start_sequence ();

          /* Normally, the dwarf2out frame-related-expr interpreter does
             not expect to have the CFA change once the frame pointer is
             set up.  Thus, we avoid marking the move insn below and
             instead indicate that the entire operation is complete after
             the frame pointer subtraction is done.  */
          
          insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx));
          if (frame_pointer_needed)
            {
              RTX_FRAME_RELATED_P (insn) = 1;
              add_reg_note (insn, REG_CFA_ADJUST_CFA,
                            gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx));
            }

          insn = emit_move_insn (my_fp, plus_constant (my_fp, -size));
          if (frame_pointer_needed)
            {
              RTX_FRAME_RELATED_P (insn) = 1;
              add_reg_note (insn, REG_CFA_ADJUST_CFA,
                            gen_rtx_SET (VOIDmode, fp,
                                         plus_constant (stack_pointer_rtx,
                                                        -size)));
            }
          
          /* Copy to stack pointer.  Note that since we've already
             changed the CFA to the frame pointer this operation
             need not be annotated if frame pointer is needed.
             Always move through unspec, see PR50063.  For meaning
             of irq_state see movhi_sp_r insn.  */

          if (cfun->machine->is_interrupt)
            irq_state = 1;

          if (TARGET_NO_INTERRUPTS
              || cfun->machine->is_signal
              || cfun->machine->is_OS_main)
            irq_state = 0;

          if (AVR_HAVE_8BIT_SP)
            irq_state = 2;

          insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx,
                                            fp, GEN_INT (irq_state)));
          if (frame_pointer_needed)
            {
              RTX_FRAME_RELATED_P (insn) = 1;
              add_reg_note (insn, REG_CFA_ADJUST_CFA,
                            gen_rtx_SET (VOIDmode, stack_pointer_rtx, fp));
            }
          
          fp_plus_insns = get_insns ();
          end_sequence ();
          
          /************  Method 2: Adjust Stack pointer  ************/

          /* Stack adjustment by means of RCALL . and/or PUSH __TMP_REG__
             can only handle specific offsets.  */
          
          if (avr_sp_immediate_operand (gen_int_mode (-size, HImode), HImode))
            {
              rtx sp_plus_insns;
              
              start_sequence ();

              insn = emit_move_insn (stack_pointer_rtx,
                                     plus_constant (stack_pointer_rtx, -size));
              RTX_FRAME_RELATED_P (insn) = 1;
              add_reg_note (insn, REG_CFA_ADJUST_CFA,
                            gen_rtx_SET (VOIDmode, stack_pointer_rtx,
                                         plus_constant (stack_pointer_rtx,
                                                        -size)));
              if (frame_pointer_needed)
                {
                  insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx));
                  RTX_FRAME_RELATED_P (insn) = 1;
                  add_reg_note (insn, REG_CFA_ADJUST_CFA,
                                gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx));
                }

              sp_plus_insns = get_insns ();
              end_sequence ();

              /************ Use shortest method  ************/
                  
              emit_insn (get_sequence_length (sp_plus_insns)
                         < get_sequence_length (fp_plus_insns)
                         ? sp_plus_insns
                         : fp_plus_insns);
            }
          else
            {
              emit_insn (fp_plus_insns);
            }

          cfun->machine->stack_usage += size;
        } /* !minimize && size != 0 */
    } /* !minimize */
}
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 184386)
+++ config/avr/avr.md	(working copy)
@@ -69,6 +69,7 @@ (define_c_enum "unspec"
    UNSPEC_COPYSIGN
    UNSPEC_IDENTITY
    UNSPEC_INSERT_BITS
+   UNSPEC_READ_SP
    ])
 
 (define_c_enum "unspecv"
@@ -581,25 +582,42 @@ (define_peephole2
 ;;============================================================================
 ;; move word (16 bit)
 
+;; Always read SP via unspec, see PR50063
+
+(define_insn "read_sp"
+  [(set (match_operand:HI 0 "register_operand"                  "=r")
+        (unspec:HI [(match_operand:HI 1 "stack_register_operand" "q")]
+                     UNSPEC_READ_SP))]
+  ""
+  {
+    return AVR_HAVE_8BIT_SP
+      ? "in %A0,%A1\;clr %B0"
+      : "in %A0,%A1\;in %B0,%B1";
+  }
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
 ;; Move register $1 to the Stack Pointer register SP.
 ;; This insn is emit during function prologue/epilogue generation.
-;;    $2 = 0: We know that IRQs are off
-;;    $2 = 1: We know that IRQs are on
-;; Remaining cases when the state of the I-Flag is unknown are
-;; handled by generic movhi insn.
+;;    $2 =  0: We know that IRQs are off
+;;    $2 =  1: We know that IRQs are on
+;;    $2 =  2: SP has 8 bits only, IRQ state does not matter
+;;    $2 = -1: We don't know anything about IRQ on/off
 
 (define_insn "movhi_sp_r"
-  [(set (match_operand:HI 0 "stack_register_operand"                "=q,q,q")
-        (unspec_volatile:HI [(match_operand:HI 1 "register_operand"  "r,r,r")
-                             (match_operand:HI 2 "const_int_operand" "L,P,LP")]
+  [(set (match_operand:HI 0 "stack_register_operand"                "=q,q,q,q,q")
+        (unspec_volatile:HI [(match_operand:HI 1 "register_operand"  "r,r,r,r,r")
+                             (match_operand:HI 2 "const_int_operand" "L,P,N,K,LPN")]
                             UNSPECV_WRITE_SP))]
-  "!AVR_HAVE_8BIT_SP"
+  ""
   "@
-	out __SP_H__,%B1\;out __SP_L__,%A1
-	cli\;out __SP_H__,%B1\;sei\;out __SP_L__,%A1
-	out __SP_L__,%A1\;out __SP_H__,%B1"
-  [(set_attr "length" "2,4,2")
-   (set_attr "isa" "no_xmega,no_xmega,xmega")
+	out %B0,%B1\;out %A0,%A1
+	cli\;out %B0,%B1\;sei\;out %A0,%A1
+	in __tmp_reg__,__SREG__\;cli\;out %B0,%B1\;out __SREG__,__tmp_reg__\;out %A0,%A1
+	out %A0,%A1
+	out %A0,%A1\;out %B0,%B1"
+  [(set_attr "length" "2,4,5,1,2")
+   (set_attr "isa" "no_xmega,no_xmega,no_xmega,*,xmega")
    (set_attr "cc" "none")])
 
 (define_peephole2
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 184386)
+++ config/avr/avr.c	(working copy)
@@ -1035,8 +1035,11 @@ avr_prologue_setup_frame (HOST_WIDE_INT
       if (frame_pointer_needed
           && size == 0)
         {
-          insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+          insn = emit_insn (gen_read_sp (frame_pointer_rtx, stack_pointer_rtx));
           RTX_FRAME_RELATED_P (insn) = 1;
+          add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                        gen_rtx_SET (VOIDmode, frame_pointer_rtx,
+                                     stack_pointer_rtx));
         }
       
       if (size != 0)
@@ -1062,8 +1065,8 @@ avr_prologue_setup_frame (HOST_WIDE_INT
               !frame_pointer_needed can only occur if the function is not a
               leaf function and thus X has already been saved.  */
               
+          int irq_state = -1;
           rtx fp_plus_insns, fp, my_fp;
-          rtx sp_minus_size = plus_constant (stack_pointer_rtx, -size);
 
           gcc_assert (frame_pointer_needed
                       || !isr_p
@@ -1076,7 +1079,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT
           if (AVR_HAVE_8BIT_SP)
             {
               /* The high byte (r29) does not change:
-                 Prefer SUBI (1 cycle) over ABIW (2 cycles, same size).  */
+                 Prefer SUBI (1 cycle) over ADIW (2 cycles, same size).  */
 
               my_fp = all_regs_rtx[FRAME_POINTER_REGNUM];
             }
@@ -1091,44 +1094,50 @@ avr_prologue_setup_frame (HOST_WIDE_INT
              instead indicate that the entire operation is complete after
              the frame pointer subtraction is done.  */
           
-          insn = emit_move_insn (fp, stack_pointer_rtx);
-          if (!frame_pointer_needed)
-            RTX_FRAME_RELATED_P (insn) = 1;
+          insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx));
+          if (frame_pointer_needed)
+            {
+              RTX_FRAME_RELATED_P (insn) = 1;
+              add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                            gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx));
+            }
 
           insn = emit_move_insn (my_fp, plus_constant (my_fp, -size));
-          RTX_FRAME_RELATED_P (insn) = 1;
-          
           if (frame_pointer_needed)
             {
+              RTX_FRAME_RELATED_P (insn) = 1;
               add_reg_note (insn, REG_CFA_ADJUST_CFA,
-                            gen_rtx_SET (VOIDmode, fp, sp_minus_size));
+                            gen_rtx_SET (VOIDmode, fp,
+                                         plus_constant (stack_pointer_rtx,
+                                                        -size)));
             }
           
           /* Copy to stack pointer.  Note that since we've already
              changed the CFA to the frame pointer this operation
-             need not be annotated if frame pointer is needed.  */
-              
-          if (AVR_HAVE_8BIT_SP || AVR_XMEGA)
-            {
-              insn = emit_move_insn (stack_pointer_rtx, fp);
-            }
-          else if (TARGET_NO_INTERRUPTS 
-                   || isr_p
-                   || cfun->machine->is_OS_main)
-            {
-              rtx irqs_are_on = GEN_INT (!!cfun->machine->is_interrupt);
-              
-              insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx,
-                                                fp, irqs_are_on));
-            }
-          else
-            {
-              insn = emit_move_insn (stack_pointer_rtx, fp);
-            }
+             need not be annotated if frame pointer is needed.
+             Always move through unspec, see PR50063.  For meaning
+             of irq_state see movhi_sp_r insn.  */
+
+          if (cfun->machine->is_interrupt)
+            irq_state = 1;
+
+          if (TARGET_NO_INTERRUPTS
+              || cfun->machine->is_signal
+              || cfun->machine->is_OS_main)
+            irq_state = 0;
 
-          if (!frame_pointer_needed)
-            RTX_FRAME_RELATED_P (insn) = 1;
+          if (AVR_HAVE_8BIT_SP)
+            irq_state = 2;
 
+          insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx,
+                                            fp, GEN_INT (irq_state)));
+          if (frame_pointer_needed)
+            {
+              RTX_FRAME_RELATED_P (insn) = 1;
+              add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                            gen_rtx_SET (VOIDmode, stack_pointer_rtx, fp));
+            }
+          
           fp_plus_insns = get_insns ();
           end_sequence ();
           
@@ -1143,13 +1152,19 @@ avr_prologue_setup_frame (HOST_WIDE_INT
               
               start_sequence ();
 
-              insn = emit_move_insn (stack_pointer_rtx, sp_minus_size);
+              insn = emit_move_insn (stack_pointer_rtx,
+                                     plus_constant (stack_pointer_rtx, -size));
               RTX_FRAME_RELATED_P (insn) = 1;
-
+              add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                            gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+                                         plus_constant (stack_pointer_rtx,
+                                                        -size)));
               if (frame_pointer_needed)
                 {
-                  insn = emit_move_insn (fp, stack_pointer_rtx);
+                  insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx));
                   RTX_FRAME_RELATED_P (insn) = 1;
+                  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+                                gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx));
                 }
 
               sp_plus_insns = get_insns ();
@@ -1360,7 +1375,7 @@ expand_epilogue (bool sibcall_p)
       
       if (!frame_pointer_needed)
         {
-          emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+          emit_insn (gen_read_sp (frame_pointer_rtx, stack_pointer_rtx));
         }
 
       if (size)
@@ -1376,7 +1391,8 @@ expand_epilogue (bool sibcall_p)
   if (size)
     {
       /* Try two methods to adjust stack and select shortest.  */
-          
+
+      int irq_state = -1;
       rtx fp, my_fp;
       rtx fp_plus_insns;
 
@@ -1401,28 +1417,20 @@ expand_epilogue (bool sibcall_p)
       start_sequence ();
 
       if (!frame_pointer_needed)
-        emit_move_insn (fp, stack_pointer_rtx);
+        emit_insn (gen_read_sp (fp, stack_pointer_rtx));
 
       emit_move_insn (my_fp, plus_constant (my_fp, size));
 
       /* Copy to stack pointer.  */
-              
-      if (AVR_HAVE_8BIT_SP || AVR_XMEGA)
-        {
-          emit_move_insn (stack_pointer_rtx, fp);
-        }
-      else if (TARGET_NO_INTERRUPTS 
-               || isr_p
-               || cfun->machine->is_OS_main)
-        {
-          rtx irqs_are_on = GEN_INT (!!cfun->machine->is_interrupt);
-          
-          emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp, irqs_are_on));
-        }
-      else
-        {
-          emit_move_insn (stack_pointer_rtx, fp);
-        }
+
+      if (TARGET_NO_INTERRUPTS)
+        irq_state = 0;
+
+      if (AVR_HAVE_8BIT_SP)
+        irq_state = 2;
+
+      emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp,
+                                 GEN_INT (irq_state)));
 
       fp_plus_insns = get_insns ();
       end_sequence ();        

Reply via email to