Georg-Johann Lay wrote:
> Richard Henderson wrote:
>> On 08/01/2011 11:42 AM, Georg-Johann Lay wrote:
>>> Is there a specific reason not to define
>>> ACCUMULATE_OUTGOING_ARGS on AVR?
>> Yes.  So that you can use PUSH.  But as I said in PR49881,
>> you probably want to provide -maccumulate-outgoing-args.
>>
>> I have a follow-up patch to the last one in that PR...
>>
>>
>> r~
> 
> PUSH is fine but what about POP?
> 
> It's very expensive to pop several bytes, i.e. disabling IRQs, loading and 
> storing SP and the like.
> Usung store+displacement has not this drawback and as I wrote, come code 
> degradations you explained
> in PR49881 are artifacts of PR46278, i.e. fake X addressing.
> 
> Johann
> 

Tried this test case:

#include <stdio.h>

void foo ()
{
    printf ("%d %d %d", 1, 2, 3);
    printf ("%d %d %d", 3, 4, 5);
    printf ("%d %d %d", 1, 4, 5);
}

Attached the output: The compiler happily pushes onto the stack
but pops only at the end of the function. So in a function with
many such calls that would eat up great deal of RAM. It that
what we want?

RETURN_POPS_ARGS cannot help here.

Johann





        .file   "printf.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
 ;  GNU C (GCC) version 4.7.0 20110803 (experimental) (avr)
 ;      compiled by GNU C version 4.3.2 [gcc-4_3-branch revision 141291], GMP 
version 5.0.1, MPFR version 3.0.0-p8, MPC version 0.8.2
 ;  GGC heuristics: --param ggc-min-expand=30 --param ggc-min-heapsize=4096
 ;  options passed:  printf.c -Os -fverbose-asm
 ;  options enabled:  -fauto-inc-dec -fbranch-count-reg -fcaller-saves
 ;  -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers
 ;  -fcrossjumping -fcse-follow-jumps -fdebug-types-section -fdefer-pop
 ;  -fdevirtualize -fearly-inlining -feliminate-unused-debug-types
 ;  -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
 ;  -fgcse-lm -fguess-branch-probability -fident -fif-conversion
 ;  -fif-conversion2 -findirect-inlining -finline -finline-functions
 ;  -finline-functions-called-once -finline-small-functions -fipa-cp
 ;  -fipa-profile -fipa-pure-const -fipa-reference -fipa-sra
 ;  -fira-share-save-slots -fira-share-spill-slots -fivopts
 ;  -fkeep-static-consts -fleading-underscore -fmath-errno
 ;  -fmerge-constants -fmerge-debug-strings -fmove-loop-invariants
 ;  -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls
 ;  -fpartial-inlining -fpeephole -fpeephole2 -fprefetch-loop-arrays
 ;  -freg-struct-return -fregmove -freorder-blocks -freorder-functions
 ;  -frerun-cse-after-loop -fsched-critical-path-heuristic
 ;  -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
 ;  -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
 ;  -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
 ;  -fsigned-zeros -fsplit-ivs-in-unroller -fsplit-wide-types
 ;  -fstrict-aliasing -fstrict-overflow -fstrict-volatile-bitfields
 ;  -fthread-jumps -ftoplevel-reorder -ftrapping-math -ftree-bit-ccp
 ;  -ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop
 ;  -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse
 ;  -ftree-forwprop -ftree-fre -ftree-loop-if-convert -ftree-loop-im
 ;  -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
 ;  -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop
 ;  -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion
 ;  -ftree-ter -ftree-vect-loop-version -ftree-vrp -funit-at-a-time
 ;  -fverbose-asm -fzero-initialized-in-bss

        .section        .rodata.str1.1,"aMS",@progbits,1
.LC0:
        .string "%d %d %d"
        .text
.global foo
        .type   foo, @function
foo:
        push r14         ;       ;  66  pushqi1/1       [length = 1]
        push r15         ;       ;  67  pushqi1/1       [length = 1]
        push r16         ;       ;  68  pushqi1/1       [length = 1]
        push r17         ;       ;  69  pushqi1/1       [length = 1]
        push r28         ;       ;  70  pushqi1/1       [length = 1]
        push r29         ;       ;  71  pushqi1/1       [length = 1]
/* prologue: function */
/* frame size = 0 */
/* stack size = 6 */
.L__stack_usage = 6
        push __zero_reg__        ;  5   pushqi1/2       [length = 1]
        ldi r24,lo8(3)   ; ,     ;  82  *reload_inqi    [length = 2]
        mov r14,r24      ;  tmp42,
        push r14         ;  tmp42        ;  7   pushqi1/1       [length = 1]
        push __zero_reg__        ;  8   pushqi1/2       [length = 1]
        ldi r24,lo8(2)   ;  tmp43,       ;  9   *movqi/2        [length = 1]
        push r24         ;  tmp43        ;  10  pushqi1/1       [length = 1]
        push __zero_reg__        ;  11  pushqi1/2       [length = 1]
        ldi r17,lo8(1)   ;  tmp44,       ;  12  *movqi/2        [length = 1]
        push r17         ;  tmp44        ;  13  pushqi1/1       [length = 1]
        ldi r28,lo8(.LC0)        ;  tmp45,       ;  14  *movhi/4        [length 
= 2]
        ldi r29,hi8(.LC0)        ;  tmp45,
        push r29         ;  tmp24        ;  16  pushqi1/1       [length = 1]
        push r28         ;  tmp25        ;  19  pushqi1/1       [length = 1]
        rcall printf     ;       ;  20  *call_value_insn/2      [length = 1]
        push __zero_reg__        ;  21  pushqi1/2       [length = 1]
        ldi r25,lo8(5)   ; ,     ;  83  *reload_inqi    [length = 2]
        mov r15,r25      ;  tmp49,
        push r15         ;  tmp49        ;  23  pushqi1/1       [length = 1]
        push __zero_reg__        ;  24  pushqi1/2       [length = 1]
        ldi r16,lo8(4)   ;  tmp50,       ;  25  *movqi/2        [length = 1]
        push r16         ;  tmp50        ;  26  pushqi1/1       [length = 1]
        push __zero_reg__        ;  27  pushqi1/2       [length = 1]
        push r14         ;  tmp42        ;  29  pushqi1/1       [length = 1]
        push r29         ;  tmp24        ;  32  pushqi1/1       [length = 1]
        push r28         ;  tmp25        ;  35  pushqi1/1       [length = 1]
        rcall printf     ;       ;  36  *call_value_insn/2      [length = 1]
        push __zero_reg__        ;  37  pushqi1/2       [length = 1]
        push r15         ;  tmp49        ;  39  pushqi1/1       [length = 1]
        push __zero_reg__        ;  40  pushqi1/2       [length = 1]
        push r16         ;  tmp50        ;  42  pushqi1/1       [length = 1]
        push __zero_reg__        ;  43  pushqi1/2       [length = 1]
        push r17         ;  tmp44        ;  45  pushqi1/1       [length = 1]
        push r29         ;  tmp24        ;  48  pushqi1/1       [length = 1]
        push r28         ;  tmp25        ;  51  pushqi1/1       [length = 1]
        rcall printf     ;       ;  52  *call_value_insn/2      [length = 1]
        in r24,__SP_L__  ;       ;  64  *movhi_sp/2     [length = 2]
        in r25,__SP_H__  ; 
        adiw r24,24      ; ,     ;  53  *addhi3/2       [length = 1]
        in __tmp_reg__,__SREG__  ;  65  *movhi_sp/1     [length = 5]
        cli
        out __SP_H__,r25         ; 
        out __SREG__,__tmp_reg__
        out __SP_L__,r24         ; 
/* epilogue start */
        pop r29  ;       ;  74  popqi   [length = 1]
        pop r28  ;       ;  75  popqi   [length = 1]
        pop r17  ;       ;  76  popqi   [length = 1]
        pop r16  ;       ;  77  popqi   [length = 1]
        pop r15  ;       ;  78  popqi   [length = 1]
        pop r14  ;       ;  79  popqi   [length = 1]
        ret      ;  80  return_from_epilogue    [length = 1]
        .size   foo, .-foo
        .ident  "GCC: (GNU) 4.7.0 20110803 (experimental)"
.global __do_copy_data

Reply via email to