https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109441

--- Comment #1 from AK <hiraditya at msn dot com> ---
I guess a better test case is this:

#include<vector>
using namespace std;

using T = int;


T v(std::vector<T> v) {
    T s;
    std::fill(v.begin(), v.end(), T());
    for (auto i = 0; i < v.size(); ++i) {
        s += v[i];
    }

    return s;
}

which has similar effect.

$ g++ -O3 -std=c++17

v(std::vector<int, std::allocator<int> >):
        push    rbp
        push    rbx
        sub     rsp, 8
        mov     rbp, QWORD PTR [rdi+8]
        mov     rcx, QWORD PTR [rdi]
        cmp     rcx, rbp
        je      .L7
        sub     rbp, rcx
        mov     rdi, rcx
        xor     esi, esi
        mov     rbx, rcx
        mov     rdx, rbp
        call    memset
        mov     rdi, rbp
        mov     edx, 1
        mov     rcx, rbx
        sar     rdi, 2
        test    rbp, rbp
        cmovne  rdx, rdi
        cmp     rbp, 12
        jbe     .L8
        mov     rax, rdx
        pxor    xmm0, xmm0
        shr     rax, 2
        sal     rax, 4
        add     rax, rbx
.L4:
        movdqu  xmm2, XMMWORD PTR [rbx]
        add     rbx, 16
        paddd   xmm0, xmm2
        cmp     rbx, rax
        jne     .L4
        movdqa  xmm1, xmm0
        psrldq  xmm1, 8
        paddd   xmm0, xmm1
        movdqa  xmm1, xmm0
        psrldq  xmm1, 4
        paddd   xmm0, xmm1
        movd    eax, xmm0
        test    dl, 3
        je      .L1
        and     rdx, -4
        mov     esi, edx
.L3:
        add     eax, DWORD PTR [rcx+rdx*4]
        lea     edx, [rsi+1]
        movsx   rdx, edx
        cmp     rdx, rdi
        jnb     .L1
        add     esi, 2
        lea     r8, [0+rdx*4]
        add     eax, DWORD PTR [rcx+rdx*4]
        movsx   rsi, esi
        cmp     rsi, rdi
        jnb     .L1
        add     eax, DWORD PTR [rcx+4+r8]
.L1:
        add     rsp, 8
        pop     rbx
        pop     rbp
        ret
.L7:
        add     rsp, 8
        xor     eax, eax
        pop     rbx
        pop     rbp
        ret
.L8:
        xor     eax, eax
        xor     esi, esi
        xor     edx, edx
        jmp     .L3

Reply via email to