This is a tentative patch to the PR. As Jakub explained, GCC does not support configurations with FP = SP and DSE optimizer generates wrong code for AVR.
This patch implements Jakub's proposal to hack around by hiding the action of setting/moving between SP and SP into UNSPEC[_VOLATILE]s. All works fine with respect to code generation. But what I cannot manage getting debug information right. Without RTX_FRAME_RELATED_P at most insns, I just get one new dwarf-ICE: dwarf2out_frame_debug_adjust_cfa[test2:dwarf2(233)]: pat = (set (reg/f:HI 28 r28) (plus:HI (reg/f:HI 32 __SP_L__) (const_int -40 [0xffffffd8]))) dwf_regno (XEXP (src, 0)) = 32 cur_cfa->reg = 28 ./gcc/testsuite/gcc.c-torture/execute/builtins/snprintf-chk.c: In function 'test2': ./gcc/testsuite/gcc.c-torture/execute/builtins/snprintf-chk.c:159:1: internal compiler error: in dwarf2out_frame_debug_adjust_cfa, at dwarf2cfi.c:1098 However, if all insns that are involved in SP/FP manipulation get the RTX_FRAME_RELATED_P, almost every test case that has -g crashes with this dwarf-ICE. I am stuck now and don't know how to proceed with this. Many thanks, Richard, for your help with the CFA notes. Unfortunately, adding the RTX_FRAME_RELATED_P markers shreds every... Attached the patch as-is and a plain work-copy of avr.c:avr_prologue_setup_frame Johann PR other/50063 * config/avr/avr.md (UNSPEC_READ_SP): New define_c_enum "unspec". (read_sp): New insn. (movhi_sp_r): Handle -1 (unknown IRQ state) and 2 (8-bit SP) in operand 2. * config/avr/avr.c (avr_prologue_setup_frame): Adjust prologue setup to use read_sp instead of vanilla move. Adjust REG_CFA notes to superseed unspec. (expand_epilogue): Adjust epilogue setup to use read_sp instead of vanilla move. As function body might contain CLI or SEI: Use irq_state 0 (IRQ known to be off) only with TARGET_NO_INTERRUPTS. Never use irq_state 1 (IRQ known to be on) here.
static void emit_push_byte (unsigned regno, bool frame_related_p) { rtx mem, reg, insn; mem = gen_rtx_POST_DEC (HImode, stack_pointer_rtx); mem = gen_frame_mem (QImode, mem); reg = gen_rtx_REG (QImode, regno); insn = emit_insn (gen_rtx_SET (VOIDmode, mem, reg)); if (frame_related_p) RTX_FRAME_RELATED_P (insn) = 1; cfun->machine->stack_usage++; } /* Helper for expand_prologue. Emit a push of a SFR via tmp_reg. SFR is a MEM representing the memory location of the SFR. If CLR_P then clear the SFR after the push using zero_reg. */ static void emit_push_sfr (rtx sfr, bool frame_related_p, bool clr_p) { rtx insn; gcc_assert (MEM_P (sfr)); /* IN __tmp_reg__, IO(SFR) */ insn = emit_move_insn (tmp_reg_rtx, sfr); if (frame_related_p) RTX_FRAME_RELATED_P (insn) = 1; /* PUSH __tmp_reg__ */ emit_push_byte (TMP_REGNO, frame_related_p); if (clr_p) { /* OUT IO(SFR), __zero_reg__ */ insn = emit_move_insn (sfr, const0_rtx); if (frame_related_p) RTX_FRAME_RELATED_P (insn) = 1; } } static void avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set) { rtx insn; bool isr_p = cfun->machine->is_interrupt || cfun->machine->is_signal; int live_seq = sequent_regs_live (); bool minimize = (TARGET_CALL_PROLOGUES && live_seq && !isr_p && !cfun->machine->is_OS_task && !cfun->machine->is_OS_main); if (minimize && (frame_pointer_needed || avr_outgoing_args_size() > 8 || (AVR_2_BYTE_PC && live_seq > 6) || live_seq > 7)) { rtx pattern; int first_reg, reg, offset; emit_move_insn (gen_rtx_REG (HImode, REG_X), gen_int_mode (size, HImode)); pattern = gen_call_prologue_saves (gen_int_mode (live_seq, HImode), gen_int_mode (live_seq+size, HImode)); insn = emit_insn (pattern); RTX_FRAME_RELATED_P (insn) = 1; /* Describe the effect of the unspec_volatile call to prologue_saves. Note that this formulation assumes that add_reg_note pushes the notes to the front. Thus we build them in the reverse order of how we want dwarf2out to process them. */ /* The function does always set frame_pointer_rtx, but whether that is going to be permanent in the function is frame_pointer_needed. */ add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (VOIDmode, (frame_pointer_needed ? frame_pointer_rtx : stack_pointer_rtx), plus_constant (stack_pointer_rtx, -(size + live_seq)))); /* Note that live_seq always contains r28+r29, but the other registers to be saved are all below 18. */ first_reg = 18 - (live_seq - 2); for (reg = 29, offset = -live_seq + 1; reg >= first_reg; reg = (reg == 28 ? 17 : reg - 1), ++offset) { rtx m, r; m = gen_rtx_MEM (QImode, plus_constant (stack_pointer_rtx, offset)); r = gen_rtx_REG (QImode, reg); add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, m, r)); } cfun->machine->stack_usage += size + live_seq; } else /* !minimize */ { int reg; for (reg = 0; reg < 32; ++reg) if (TEST_HARD_REG_BIT (set, reg)) emit_push_byte (reg, true); if (frame_pointer_needed && (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main))) { /* Push frame pointer. Always be consistent about the ordering of pushes -- epilogue_restores expects the register pair to be pushed low byte first. */ emit_push_byte (REG_Y, true); emit_push_byte (REG_Y + 1, true); } if (frame_pointer_needed && size == 0) { insn = emit_insn (gen_read_sp (frame_pointer_rtx, stack_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (VOIDmode, frame_pointer_rtx, stack_pointer_rtx)); } if (size != 0) { /* Creating a frame can be done by direct manipulation of the stack or via the frame pointer. These two methods are: fp = sp fp -= size sp = fp or sp -= size fp = sp (*) the optimum method depends on function type, stack and frame size. To avoid a complex logic, both methods are tested and shortest is selected. There is also the case where SIZE != 0 and no frame pointer is needed; this can occur if ACCUMULATE_OUTGOING_ARGS is on. In that case, insn (*) is not needed in that case. We use the X register as scratch. This is save because in X is call-clobbered. In an interrupt routine, the case of SIZE != 0 together with !frame_pointer_needed can only occur if the function is not a leaf function and thus X has already been saved. */ int irq_state = -1; rtx fp_plus_insns, fp, my_fp; gcc_assert (frame_pointer_needed || !isr_p || !current_function_is_leaf); fp = my_fp = (frame_pointer_needed ? frame_pointer_rtx : gen_rtx_REG (Pmode, REG_X)); if (AVR_HAVE_8BIT_SP) { /* The high byte (r29) does not change: Prefer SUBI (1 cycle) over ADIW (2 cycles, same size). */ my_fp = all_regs_rtx[FRAME_POINTER_REGNUM]; } /************ Method 1: Adjust frame pointer ************/ start_sequence (); /* Normally, the dwarf2out frame-related-expr interpreter does not expect to have the CFA change once the frame pointer is set up. Thus, we avoid marking the move insn below and instead indicate that the entire operation is complete after the frame pointer subtraction is done. */ insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx)); if (frame_pointer_needed) { RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx)); } insn = emit_move_insn (my_fp, plus_constant (my_fp, -size)); if (frame_pointer_needed) { RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (VOIDmode, fp, plus_constant (stack_pointer_rtx, -size))); } /* Copy to stack pointer. Note that since we've already changed the CFA to the frame pointer this operation need not be annotated if frame pointer is needed. Always move through unspec, see PR50063. For meaning of irq_state see movhi_sp_r insn. */ if (cfun->machine->is_interrupt) irq_state = 1; if (TARGET_NO_INTERRUPTS || cfun->machine->is_signal || cfun->machine->is_OS_main) irq_state = 0; if (AVR_HAVE_8BIT_SP) irq_state = 2; insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp, GEN_INT (irq_state))); if (frame_pointer_needed) { RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (VOIDmode, stack_pointer_rtx, fp)); } fp_plus_insns = get_insns (); end_sequence (); /************ Method 2: Adjust Stack pointer ************/ /* Stack adjustment by means of RCALL . and/or PUSH __TMP_REG__ can only handle specific offsets. */ if (avr_sp_immediate_operand (gen_int_mode (-size, HImode), HImode)) { rtx sp_plus_insns; start_sequence (); insn = emit_move_insn (stack_pointer_rtx, plus_constant (stack_pointer_rtx, -size)); RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (VOIDmode, stack_pointer_rtx, plus_constant (stack_pointer_rtx, -size))); if (frame_pointer_needed) { insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx)); } sp_plus_insns = get_insns (); end_sequence (); /************ Use shortest method ************/ emit_insn (get_sequence_length (sp_plus_insns) < get_sequence_length (fp_plus_insns) ? sp_plus_insns : fp_plus_insns); } else { emit_insn (fp_plus_insns); } cfun->machine->stack_usage += size; } /* !minimize && size != 0 */ } /* !minimize */ }
Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 184386) +++ config/avr/avr.md (working copy) @@ -69,6 +69,7 @@ (define_c_enum "unspec" UNSPEC_COPYSIGN UNSPEC_IDENTITY UNSPEC_INSERT_BITS + UNSPEC_READ_SP ]) (define_c_enum "unspecv" @@ -581,25 +582,42 @@ (define_peephole2 ;;============================================================================ ;; move word (16 bit) +;; Always read SP via unspec, see PR50063 + +(define_insn "read_sp" + [(set (match_operand:HI 0 "register_operand" "=r") + (unspec:HI [(match_operand:HI 1 "stack_register_operand" "q")] + UNSPEC_READ_SP))] + "" + { + return AVR_HAVE_8BIT_SP + ? "in %A0,%A1\;clr %B0" + : "in %A0,%A1\;in %B0,%B1"; + } + [(set_attr "length" "2") + (set_attr "cc" "clobber")]) + ;; Move register $1 to the Stack Pointer register SP. ;; This insn is emit during function prologue/epilogue generation. -;; $2 = 0: We know that IRQs are off -;; $2 = 1: We know that IRQs are on -;; Remaining cases when the state of the I-Flag is unknown are -;; handled by generic movhi insn. +;; $2 = 0: We know that IRQs are off +;; $2 = 1: We know that IRQs are on +;; $2 = 2: SP has 8 bits only, IRQ state does not matter +;; $2 = -1: We don't know anything about IRQ on/off (define_insn "movhi_sp_r" - [(set (match_operand:HI 0 "stack_register_operand" "=q,q,q") - (unspec_volatile:HI [(match_operand:HI 1 "register_operand" "r,r,r") - (match_operand:HI 2 "const_int_operand" "L,P,LP")] + [(set (match_operand:HI 0 "stack_register_operand" "=q,q,q,q,q") + (unspec_volatile:HI [(match_operand:HI 1 "register_operand" "r,r,r,r,r") + (match_operand:HI 2 "const_int_operand" "L,P,N,K,LPN")] UNSPECV_WRITE_SP))] - "!AVR_HAVE_8BIT_SP" + "" "@ - out __SP_H__,%B1\;out __SP_L__,%A1 - cli\;out __SP_H__,%B1\;sei\;out __SP_L__,%A1 - out __SP_L__,%A1\;out __SP_H__,%B1" - [(set_attr "length" "2,4,2") - (set_attr "isa" "no_xmega,no_xmega,xmega") + out %B0,%B1\;out %A0,%A1 + cli\;out %B0,%B1\;sei\;out %A0,%A1 + in __tmp_reg__,__SREG__\;cli\;out %B0,%B1\;out __SREG__,__tmp_reg__\;out %A0,%A1 + out %A0,%A1 + out %A0,%A1\;out %B0,%B1" + [(set_attr "length" "2,4,5,1,2") + (set_attr "isa" "no_xmega,no_xmega,no_xmega,*,xmega") (set_attr "cc" "none")]) (define_peephole2 Index: config/avr/avr.c =================================================================== --- config/avr/avr.c (revision 184386) +++ config/avr/avr.c (working copy) @@ -1035,8 +1035,11 @@ avr_prologue_setup_frame (HOST_WIDE_INT if (frame_pointer_needed && size == 0) { - insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + insn = emit_insn (gen_read_sp (frame_pointer_rtx, stack_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, frame_pointer_rtx, + stack_pointer_rtx)); } if (size != 0) @@ -1062,8 +1065,8 @@ avr_prologue_setup_frame (HOST_WIDE_INT !frame_pointer_needed can only occur if the function is not a leaf function and thus X has already been saved. */ + int irq_state = -1; rtx fp_plus_insns, fp, my_fp; - rtx sp_minus_size = plus_constant (stack_pointer_rtx, -size); gcc_assert (frame_pointer_needed || !isr_p @@ -1076,7 +1079,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT if (AVR_HAVE_8BIT_SP) { /* The high byte (r29) does not change: - Prefer SUBI (1 cycle) over ABIW (2 cycles, same size). */ + Prefer SUBI (1 cycle) over ADIW (2 cycles, same size). */ my_fp = all_regs_rtx[FRAME_POINTER_REGNUM]; } @@ -1091,44 +1094,50 @@ avr_prologue_setup_frame (HOST_WIDE_INT instead indicate that the entire operation is complete after the frame pointer subtraction is done. */ - insn = emit_move_insn (fp, stack_pointer_rtx); - if (!frame_pointer_needed) - RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx)); + if (frame_pointer_needed) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx)); + } insn = emit_move_insn (my_fp, plus_constant (my_fp, -size)); - RTX_FRAME_RELATED_P (insn) = 1; - if (frame_pointer_needed) { + RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_CFA_ADJUST_CFA, - gen_rtx_SET (VOIDmode, fp, sp_minus_size)); + gen_rtx_SET (VOIDmode, fp, + plus_constant (stack_pointer_rtx, + -size))); } /* Copy to stack pointer. Note that since we've already changed the CFA to the frame pointer this operation - need not be annotated if frame pointer is needed. */ - - if (AVR_HAVE_8BIT_SP || AVR_XMEGA) - { - insn = emit_move_insn (stack_pointer_rtx, fp); - } - else if (TARGET_NO_INTERRUPTS - || isr_p - || cfun->machine->is_OS_main) - { - rtx irqs_are_on = GEN_INT (!!cfun->machine->is_interrupt); - - insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx, - fp, irqs_are_on)); - } - else - { - insn = emit_move_insn (stack_pointer_rtx, fp); - } + need not be annotated if frame pointer is needed. + Always move through unspec, see PR50063. For meaning + of irq_state see movhi_sp_r insn. */ + + if (cfun->machine->is_interrupt) + irq_state = 1; + + if (TARGET_NO_INTERRUPTS + || cfun->machine->is_signal + || cfun->machine->is_OS_main) + irq_state = 0; - if (!frame_pointer_needed) - RTX_FRAME_RELATED_P (insn) = 1; + if (AVR_HAVE_8BIT_SP) + irq_state = 2; + insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx, + fp, GEN_INT (irq_state))); + if (frame_pointer_needed) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, fp)); + } + fp_plus_insns = get_insns (); end_sequence (); @@ -1143,13 +1152,19 @@ avr_prologue_setup_frame (HOST_WIDE_INT start_sequence (); - insn = emit_move_insn (stack_pointer_rtx, sp_minus_size); + insn = emit_move_insn (stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -size)); RTX_FRAME_RELATED_P (insn) = 1; - + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -size))); if (frame_pointer_needed) { - insn = emit_move_insn (fp, stack_pointer_rtx); + insn = emit_insn (gen_read_sp (fp, stack_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx)); } sp_plus_insns = get_insns (); @@ -1360,7 +1375,7 @@ expand_epilogue (bool sibcall_p) if (!frame_pointer_needed) { - emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + emit_insn (gen_read_sp (frame_pointer_rtx, stack_pointer_rtx)); } if (size) @@ -1376,7 +1391,8 @@ expand_epilogue (bool sibcall_p) if (size) { /* Try two methods to adjust stack and select shortest. */ - + + int irq_state = -1; rtx fp, my_fp; rtx fp_plus_insns; @@ -1401,28 +1417,20 @@ expand_epilogue (bool sibcall_p) start_sequence (); if (!frame_pointer_needed) - emit_move_insn (fp, stack_pointer_rtx); + emit_insn (gen_read_sp (fp, stack_pointer_rtx)); emit_move_insn (my_fp, plus_constant (my_fp, size)); /* Copy to stack pointer. */ - - if (AVR_HAVE_8BIT_SP || AVR_XMEGA) - { - emit_move_insn (stack_pointer_rtx, fp); - } - else if (TARGET_NO_INTERRUPTS - || isr_p - || cfun->machine->is_OS_main) - { - rtx irqs_are_on = GEN_INT (!!cfun->machine->is_interrupt); - - emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp, irqs_are_on)); - } - else - { - emit_move_insn (stack_pointer_rtx, fp); - } + + if (TARGET_NO_INTERRUPTS) + irq_state = 0; + + if (AVR_HAVE_8BIT_SP) + irq_state = 2; + + emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp, + GEN_INT (irq_state))); fp_plus_insns = get_insns (); end_sequence ();