https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100104

--- Comment #4 from 康桓瑋 <hewillk at gmail dot com> ---
And Build "copy" with -O2 on ARM64 is identical with -O3
(https://godbolt.org/z/5hjKGbrTd):


.LC0:
        .string "vector::_M_realloc_insert"
transform(std::vector<double, std::allocator<double> > const&):
        stp     x29, x30, [sp, -64]!
        mov     x29, sp
        stp     x19, x20, [sp, 16]
        mov     x19, x8
        ldp     x20, x0, [x0]
        stp     xzr, xzr, [x8]
        str     xzr, [x8, 16]
        sub     x0, x0, x20
        cmp     x0, 0
        ble     .L19
        mov     x1, 0
        str     x21, [sp, 32]
        asr     x21, x0, 3
        mov     x0, 0
        b       .L23
.L35:
        str     w2, [x0], 4
        add     x20, x20, 8
        subs    x21, x21, #1
        str     x0, [x19, 8]
        beq     .L34
.L36:
        ldp     x0, x1, [x19, 8]
.L23:
        ldr     d0, [x20]
        fcvtzs  w2, d0
        str     w2, [sp, 60]
        cmp     x1, x0
        bne     .L35
        add     x2, sp, 60
        mov     x0, x19
        bl      void std::vector<int, std::allocator<int>
>::_M_realloc_insert<int>(__gnu_cxx::__normal_iterator<int*, std::vector<int,
std::allocator<int> > >, int&&)
        add     x20, x20, 8
        subs    x21, x21, #1
        bne     .L36
.L34:
        ldr     x21, [sp, 32]
.L19:
        mov     x0, x19
        ldp     x19, x20, [sp, 16]
        ldp     x29, x30, [sp], 64
        ret
        ldr     x2, [x19]
        ldr     x1, [x19, 16]
        mov     x19, x0
        sub     x1, x1, x2
        cbz     x2, .L25
        mov     x0, x2
        bl      operator delete(void*, unsigned long)
.L25:
        mov     x0, x19
        bl      _Unwind_Resume
DW.ref.__gxx_personality_v0:
        .xword  __gxx_personality_v0


===========================================================================



However, "transform" has been further optimized under -O3
(https://godbolt.org/z/5hjKGbrTd):

.LC0:
        .string "vector::_M_realloc_insert"
transform(std::vector<double, std::allocator<double> > const&):
        stp     x29, x30, [sp, -96]!
        mov     x29, sp
        stp     x19, x20, [sp, 16]
        stp     x21, x22, [sp, 32]
        mov     x21, x8
        ldp     x20, x22, [x0]
        stp     xzr, xzr, [x8]
        str     xzr, [x8, 16]
        cmp     x22, x20
        beq     .L1
        mov     x19, 0
        stp     x23, x24, [sp, 48]
        stp     x25, x26, [sp, 64]
        mov     x25, 2305843009213693951
        stp     x27, x28, [sp, 80]
        mov     x28, 0
        b       .L13
.L32:
        str     w27, [x19], 4
        add     x20, x20, 8
        str     x19, [x21, 8]
        cmp     x22, x20
        beq     .L31
.L13:
        ldr     d0, [x20]
        fcvtzs  w27, d0
        cmp     x19, x28
        bne     .L32
        ldr     x24, [x21]
        sub     x23, x19, x24
        asr     x1, x23, 2
        cmp     x1, x25
        beq     .L33
        cmp     x1, 0
        csinc   x0, x1, xzr, ne
        adds    x0, x0, x1
        bcs     .L7
        cbnz    x0, .L34
        mov     x28, 0
        mov     x26, 0
.L9:
        add     x19, x23, 4
        str     w27, [x26, x23]
        add     x19, x26, x19
        cmp     x23, 0
        bgt     .L35
        cbnz    x24, .L36
.L12:
        add     x20, x20, 8
        stp     x26, x19, [x21]
        str     x28, [x21, 16]
        cmp     x22, x20
        bne     .L13
.L31:
        ldp     x23, x24, [sp, 48]
        ldp     x25, x26, [sp, 64]
        ldp     x27, x28, [sp, 80]
.L1:
        mov     x0, x21
        ldp     x19, x20, [sp, 16]
        ldp     x21, x22, [sp, 32]
        ldp     x29, x30, [sp], 96
        ret
.L35:
        mov     x1, x24
        mov     x2, x23
        mov     x0, x26
        bl      memmove
        ldr     x1, [x21, 16]
        sub     x1, x1, x24
.L11:
        mov     x0, x24
        bl      operator delete(void*, unsigned long)
        b       .L12
.L36:
        ldr     x1, [x21, 16]
        sub     x1, x1, x24
        b       .L11
.L34:
        cmp     x0, x25
        csel    x0, x0, x25, ls
        lsl     x28, x0, 2
.L8:
        mov     x0, x28
        bl      operator new(unsigned long)
        mov     x26, x0
        add     x28, x0, x28
        b       .L9
.L7:
        mov     x28, 9223372036854775804
        b       .L8
.L33:
        adrp    x0, .LC0
        add     x0, x0, :lo12:.LC0
        bl      std::__throw_length_error(char const*)
        ldr     x2, [x21]
        mov     x19, x0
        ldr     x1, [x21, 16]
        sub     x1, x1, x2
        cbz     x2, .L16
        mov     x0, x2
        bl      operator delete(void*, unsigned long)
.L16:
        mov     x0, x19
        bl      _Unwind_Resume
DW.ref.__gxx_personality_v0:
        .xword  __gxx_personality_v0

Reply via email to