tested following code with http://gcc.godbolt.org/ tested with g++-4.8 (Ubuntu 4.8.1.2ubuntu1~12.04) 4.8.1 g++ (GCC) 4.9.0 20130909 (experimental)
and the result with -O3 + defined USE_ITER seems to be a little bit long ---------- static void foo(int a, int& dummy) { dummy += a; } #define USE_ITER int main(int argc, char** argv) { //-- //anti-optimizer int dummy = 0; int* array = (int*)&argv; //-- #if defined(USE_ITER) int* pend = &array[10]; for(int* p = &array[0]; p < pend; ++p) foo(*p, dummy); #else for(int i = 0; i < 10; ++i) foo(array[i], dummy); #endif return dummy; } --------- with -O2 with/without define USE_ITER result: main: lea rdx, [rsp-8] lea rcx, [rsp+32] mov QWORD PTR [rsp-8], rsi xor eax, eax .L3: add eax, DWORD PTR [rdx] add rdx, 4 cmp rdx, rcx jb .L3 rep; ret with -O3 without define USE_ITER main: mov rax, rsi shr rax, 32 add eax, esi add eax, DWORD PTR [rsp] add eax, DWORD PTR [rsp+4] add eax, DWORD PTR [rsp+8] add eax, DWORD PTR [rsp+12] add eax, DWORD PTR [rsp+16] add eax, DWORD PTR [rsp+20] add eax, DWORD PTR [rsp+24] add eax, DWORD PTR [rsp+28] ret with define USE_ITER main: lea rdi, [rsp-16] lea rax, [rsp+27] mov QWORD PTR [rsp-16], rsi lea r8, [rsp+24] mov ecx, 1 lea rdx, [rdi+4] lea rsi, [rdi+1] sub rax, rdx mov rdx, rdi shr rax, 2 add rax, 1 cmp rsi, r8 cmovbe rcx, rax and edx, 15 shr rdx, 2 cmp rcx, rdx cmovbe rdx, rcx cmp rax, 8 ja .L30 .L2: mov rdx, rcx .L11: cmp rdx, 1 mov eax, DWORD PTR [rsp-16] je .L13 add eax, DWORD PTR [rsp-12] cmp rdx, 2 je .L14 add eax, DWORD PTR [rsp-8] cmp rdx, 3 je .L15 add eax, DWORD PTR [rsp-4] cmp rdx, 4 je .L16 add eax, DWORD PTR [rsp] cmp rdx, 5 je .L17 add eax, DWORD PTR [rsp+4] cmp rdx, 6 je .L18 add eax, DWORD PTR [rsp+8] cmp rdx, 7 je .L19 add eax, DWORD PTR [rsp+12] lea rsi, [rsp+16] .L4: cmp rcx, rdx je .L23 .L3: sub rcx, rdx mov r9, rcx shr r9, 2 lea r10, [0+r9*4] test r10, r10 je .L6 lea rdx, [rdi+rdx*4] cmp r9, 1 movdqu xmm0, XMMWORD PTR [rdx] jbe .L7 movdqu xmm1, XMMWORD PTR [rdx+16] cmp r9, 2 paddd xmm0, xmm1 je .L7 movdqu xmm1, XMMWORD PTR [rdx+32] paddd xmm0, xmm1 .L7: movdqa xmm2, xmm0 lea rsi, [rsi+r10*4] psrldq xmm2, 8 paddd xmm0, xmm2 movdqa xmm3, xmm0 psrldq xmm3, 4 paddd xmm0, xmm3 movd edx, xmm0 add eax, edx cmp rcx, r10 je .L23 .L6: lea rdx, [rsi+4] add eax, DWORD PTR [rsi] cmp r8, rdx jbe .L23 lea rdx, [rsi+8] add eax, DWORD PTR [rsi+4] cmp r8, rdx jbe .L31 add eax, DWORD PTR [rsi+8] ret .L23: rep; ret .L30: cmp rsi, r8 ja .L2 xor eax, eax test rdx, rdx mov rsi, rdi je .L3 jmp .L11 .L31: ret .L16: mov rsi, rsp jmp .L4 .L17: lea rsi, [rsp+4] jmp .L4 .L18: lea rsi, [rsp+8] jmp .L4 .L19: lea rsi, [rsp+12] jmp .L4 .L14: lea rsi, [rsp-8] jmp .L4 .L15: lea rsi, [rsp-4] jmp .L4 .L13: lea rsi, [rdi+4] jmp .L4