Hello All, With a recently compiled gcc-trunk on x86-64/linux, I am compiling the folllowing example:
################# /* file testmanychar.c */ extern void g (int, char *, char *, char *); void f (void) { char x0, x1, x2, x3, x4, x5, x6, x7; /* assuming x0 is word aligned on a x86_64, and variables are bytes in memory, we could clear all the variables in one machine instruction */ x0 = x1 = x2 = x3 = x4 = x5 = x6 = x7 = (char) 0; g (10, &x0, &x1, &x2); g (20, &x2, &x3, &x4); g (30, &x4, &x5, &x6); g (40, &x6, &x7, &x0); } ################# My intuition was that GCC could store x0 on a 64 bits aligned byte, and x1 immediately after, and so one, and clear all the eight bytes at once using a single machine instruction [clearing a 64 bits word]. But this is not the case, since gcc-trunk -S -O3 -fverbose-asm testmanychar.c gives the following code ################# .type f, @function f: .LFB0: .cfi_startproc movq %rbx, -24(%rsp) #, movq %rbp, -16(%rsp) #, movl $10, %edi #, movq %r12, -8(%rsp) #, subq $40, %rsp #, .cfi_def_cfa_offset 48 leaq 13(%rsp), %rbx #, tmp58 .cfi_offset 12, -16 .cfi_offset 6, -24 .cfi_offset 3, -32 leaq 15(%rsp), %rbp #, tmp60 leaq 14(%rsp), %rdx #, tmp59 leaq 11(%rsp), %r12 #, tmp61 movb $0, 8(%rsp) #, x7 movb $0, 9(%rsp) #, x6 movq %rbx, %rcx # tmp58, movq %rbp, %rsi # tmp60, movb $0, 10(%rsp) #, x5 movb $0, 11(%rsp) #, x4 movb $0, 12(%rsp) #, x3 movb $0, 13(%rsp) #, x2 movb $0, 14(%rsp) #, x1 movb $0, 15(%rsp) #, x0 call g # leaq 12(%rsp), %rdx #, tmp62 movq %r12, %rcx # tmp61, movq %rbx, %rsi # tmp58, movl $20, %edi #, leaq 9(%rsp), %rbx #, tmp64 call g # leaq 10(%rsp), %rdx #, tmp65 movq %rbx, %rcx # tmp64, movq %r12, %rsi # tmp61, movl $30, %edi #, call g # leaq 8(%rsp), %rdx #, tmp68 movq %rbp, %rcx # tmp60, movq %rbx, %rsi # tmp64, movl $40, %edi #, call g # movq 16(%rsp), %rbx #, movq 24(%rsp), %rbp #, movq 32(%rsp), %r12 #, addq $40, %rsp #, .cfi_def_cfa_offset 8 ret .cfi_endproc .LFE0: .size f, .-f .ident "GCC: (GNU) 4.5.0 20100309 (experimental) [trunk revision 157303]" ##################### With gcc-trunk -S -O3 -fverbose-asm -march=core2 -mtune=core2 testmanychar.c I am getting still ################## # options passed: testmanychar.c -march=core2 -mtune=core2 -O3 .globl f .type f, @function f: .LFB0: .cfi_startproc movq %rbx, -24(%rsp) #, movq %rbp, -16(%rsp) #, movq %r12, -8(%rsp) #, movl $10, %edi #, subq $40, %rsp #, .cfi_def_cfa_offset 48 leaq 13(%rsp), %rbx #, tmp58 .cfi_offset 12, -16 .cfi_offset 6, -24 .cfi_offset 3, -32 leaq 15(%rsp), %rbp #, tmp60 leaq 11(%rsp), %r12 #, tmp61 leaq 14(%rsp), %rdx #, tmp59 movq %rbx, %rcx # tmp58, movq %rbp, %rsi # tmp60, movb $0, 8(%rsp) #, x7 movb $0, 9(%rsp) #, x6 movb $0, 10(%rsp) #, x5 movb $0, 11(%rsp) #, x4 movb $0, 12(%rsp) #, x3 movb $0, 13(%rsp) #, x2 movb $0, 14(%rsp) #, x1 movb $0, 15(%rsp) #, x0 call g # leaq 12(%rsp), %rdx #, tmp62 movq %r12, %rcx # tmp61, movq %rbx, %rsi # tmp58, movl $20, %edi #, leaq 9(%rsp), %rbx #, tmp64 call g # leaq 10(%rsp), %rdx #, tmp65 movq %rbx, %rcx # tmp64, movq %r12, %rsi # tmp61, movl $30, %edi #, call g # leaq 8(%rsp), %rdx #, tmp68 movq %rbp, %rcx # tmp60, movq %rbx, %rsi # tmp64, movl $40, %edi #, call g # movq 16(%rsp), %rbx #, movq 24(%rsp), %rbp #, movq 32(%rsp), %r12 #, addq $40, %rsp #, .cfi_def_cfa_offset 8 ret .cfi_endproc .LFE0: .size f, .-f .ident "GCC: (GNU) 4.5.0 20100309 (experimental) [trunk revision 157303]" #### I was hoping that movb $0, 8(%rsp) #, x7 movb $0, 9(%rsp) #, x6 movb $0, 10(%rsp) #, x5 movb $0, 11(%rsp) #, x4 movb $0, 12(%rsp) #, x3 movb $0, 13(%rsp) #, x2 movb $0, 14(%rsp) #, x1 movb $0, 15(%rsp) #, x0 could be just something like movq $0, 8(%rsp) or something similar. I do realize that such an optimization is difficult to implement... (probably messing the register allocator, etc...). Or is the Core2 processor sufficient smart to execute exactly as fast a sequence of 8 consecutive byte moves as a single 8-byte word move? Regards. -- Basile STARYNKEVITCH http://starynkevitch.net/Basile/ email: basile<at>starynkevitch<dot>net mobile: +33 6 8501 2359 8, rue de la Faiencerie, 92340 Bourg La Reine, France *** opinions {are only mines, sont seulement les miennes} ***