Georg-Johann Lay wrote: > Richard Henderson wrote: >> On 08/01/2011 11:42 AM, Georg-Johann Lay wrote: >>> Is there a specific reason not to define >>> ACCUMULATE_OUTGOING_ARGS on AVR? >> Yes. So that you can use PUSH. But as I said in PR49881, >> you probably want to provide -maccumulate-outgoing-args. >> >> I have a follow-up patch to the last one in that PR... >> >> >> r~ > > PUSH is fine but what about POP? > > It's very expensive to pop several bytes, i.e. disabling IRQs, loading and > storing SP and the like. > Usung store+displacement has not this drawback and as I wrote, come code > degradations you explained > in PR49881 are artifacts of PR46278, i.e. fake X addressing. > > Johann >
Tried this test case: #include <stdio.h> void foo () { printf ("%d %d %d", 1, 2, 3); printf ("%d %d %d", 3, 4, 5); printf ("%d %d %d", 1, 4, 5); } Attached the output: The compiler happily pushes onto the stack but pops only at the end of the function. So in a function with many such calls that would eat up great deal of RAM. It that what we want? RETURN_POPS_ARGS cannot help here. Johann
.file "printf.c" __SREG__ = 0x3f __SP_H__ = 0x3e __SP_L__ = 0x3d __tmp_reg__ = 0 __zero_reg__ = 1 ; GNU C (GCC) version 4.7.0 20110803 (experimental) (avr) ; compiled by GNU C version 4.3.2 [gcc-4_3-branch revision 141291], GMP version 5.0.1, MPFR version 3.0.0-p8, MPC version 0.8.2 ; GGC heuristics: --param ggc-min-expand=30 --param ggc-min-heapsize=4096 ; options passed: printf.c -Os -fverbose-asm ; options enabled: -fauto-inc-dec -fbranch-count-reg -fcaller-saves ; -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers ; -fcrossjumping -fcse-follow-jumps -fdebug-types-section -fdefer-pop ; -fdevirtualize -fearly-inlining -feliminate-unused-debug-types ; -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse ; -fgcse-lm -fguess-branch-probability -fident -fif-conversion ; -fif-conversion2 -findirect-inlining -finline -finline-functions ; -finline-functions-called-once -finline-small-functions -fipa-cp ; -fipa-profile -fipa-pure-const -fipa-reference -fipa-sra ; -fira-share-save-slots -fira-share-spill-slots -fivopts ; -fkeep-static-consts -fleading-underscore -fmath-errno ; -fmerge-constants -fmerge-debug-strings -fmove-loop-invariants ; -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls ; -fpartial-inlining -fpeephole -fpeephole2 -fprefetch-loop-arrays ; -freg-struct-return -fregmove -freorder-blocks -freorder-functions ; -frerun-cse-after-loop -fsched-critical-path-heuristic ; -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock ; -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec ; -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column ; -fsigned-zeros -fsplit-ivs-in-unroller -fsplit-wide-types ; -fstrict-aliasing -fstrict-overflow -fstrict-volatile-bitfields ; -fthread-jumps -ftoplevel-reorder -ftrapping-math -ftree-bit-ccp ; -ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop ; -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse ; -ftree-forwprop -ftree-fre -ftree-loop-if-convert -ftree-loop-im ; -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops= ; -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop ; -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion ; -ftree-ter -ftree-vect-loop-version -ftree-vrp -funit-at-a-time ; -fverbose-asm -fzero-initialized-in-bss .section .rodata.str1.1,"aMS",@progbits,1 .LC0: .string "%d %d %d" .text .global foo .type foo, @function foo: push r14 ; ; 66 pushqi1/1 [length = 1] push r15 ; ; 67 pushqi1/1 [length = 1] push r16 ; ; 68 pushqi1/1 [length = 1] push r17 ; ; 69 pushqi1/1 [length = 1] push r28 ; ; 70 pushqi1/1 [length = 1] push r29 ; ; 71 pushqi1/1 [length = 1] /* prologue: function */ /* frame size = 0 */ /* stack size = 6 */ .L__stack_usage = 6 push __zero_reg__ ; 5 pushqi1/2 [length = 1] ldi r24,lo8(3) ; , ; 82 *reload_inqi [length = 2] mov r14,r24 ; tmp42, push r14 ; tmp42 ; 7 pushqi1/1 [length = 1] push __zero_reg__ ; 8 pushqi1/2 [length = 1] ldi r24,lo8(2) ; tmp43, ; 9 *movqi/2 [length = 1] push r24 ; tmp43 ; 10 pushqi1/1 [length = 1] push __zero_reg__ ; 11 pushqi1/2 [length = 1] ldi r17,lo8(1) ; tmp44, ; 12 *movqi/2 [length = 1] push r17 ; tmp44 ; 13 pushqi1/1 [length = 1] ldi r28,lo8(.LC0) ; tmp45, ; 14 *movhi/4 [length = 2] ldi r29,hi8(.LC0) ; tmp45, push r29 ; tmp24 ; 16 pushqi1/1 [length = 1] push r28 ; tmp25 ; 19 pushqi1/1 [length = 1] rcall printf ; ; 20 *call_value_insn/2 [length = 1] push __zero_reg__ ; 21 pushqi1/2 [length = 1] ldi r25,lo8(5) ; , ; 83 *reload_inqi [length = 2] mov r15,r25 ; tmp49, push r15 ; tmp49 ; 23 pushqi1/1 [length = 1] push __zero_reg__ ; 24 pushqi1/2 [length = 1] ldi r16,lo8(4) ; tmp50, ; 25 *movqi/2 [length = 1] push r16 ; tmp50 ; 26 pushqi1/1 [length = 1] push __zero_reg__ ; 27 pushqi1/2 [length = 1] push r14 ; tmp42 ; 29 pushqi1/1 [length = 1] push r29 ; tmp24 ; 32 pushqi1/1 [length = 1] push r28 ; tmp25 ; 35 pushqi1/1 [length = 1] rcall printf ; ; 36 *call_value_insn/2 [length = 1] push __zero_reg__ ; 37 pushqi1/2 [length = 1] push r15 ; tmp49 ; 39 pushqi1/1 [length = 1] push __zero_reg__ ; 40 pushqi1/2 [length = 1] push r16 ; tmp50 ; 42 pushqi1/1 [length = 1] push __zero_reg__ ; 43 pushqi1/2 [length = 1] push r17 ; tmp44 ; 45 pushqi1/1 [length = 1] push r29 ; tmp24 ; 48 pushqi1/1 [length = 1] push r28 ; tmp25 ; 51 pushqi1/1 [length = 1] rcall printf ; ; 52 *call_value_insn/2 [length = 1] in r24,__SP_L__ ; ; 64 *movhi_sp/2 [length = 2] in r25,__SP_H__ ; adiw r24,24 ; , ; 53 *addhi3/2 [length = 1] in __tmp_reg__,__SREG__ ; 65 *movhi_sp/1 [length = 5] cli out __SP_H__,r25 ; out __SREG__,__tmp_reg__ out __SP_L__,r24 ; /* epilogue start */ pop r29 ; ; 74 popqi [length = 1] pop r28 ; ; 75 popqi [length = 1] pop r17 ; ; 76 popqi [length = 1] pop r16 ; ; 77 popqi [length = 1] pop r15 ; ; 78 popqi [length = 1] pop r14 ; ; 79 popqi [length = 1] ret ; 80 return_from_epilogue [length = 1] .size foo, .-foo .ident "GCC: (GNU) 4.7.0 20110803 (experimental)" .global __do_copy_data