https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118033

            Bug ID: 118033
           Summary: [Missing optimization] Keep __builtin_unreachable for
                    asserts in the release build
           Product: gcc
           Version: 14.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: dmitriy.ovdienko at gmail dot com
  Target Milestone: ---

Could you define `assert` macro as following in case if `NDEBUG` macro is
defined:

#if defined(NDEBUG)
# define assert(expr) if ((expr)) {} else { __builtin_unreachable(); }
#endif

Let's assume there is following function:

    uint32_t assign32(char const* data, size_t size) noexcept
    {
        assert(size <= 4);

        uint32_t result{0};

        while(size)
        {
            size -= 1;
            result <<= 8;
            result |= static_cast<unsigned char>(data[size]);
        }

        return result;
    }

When NDEBUG macro is not defined compiler unrolls the loop into the following
code:

assign32(char const*, unsigned long):
        cmp     rsi, 4
        ja      .L2
        test    rsi, rsi
        je      .L18
        movzx   eax, BYTE PTR [rdi-1+rsi]
        cmp     rsi, 1
        je      .L1
        movzx   edx, BYTE PTR [rdi-2+rsi]
        sal     eax, 8
        or      eax, edx
        cmp     rsi, 2
        je      .L1
        movzx   edx, BYTE PTR [rdi-3+rsi]
        sal     eax, 8
        or      eax, edx
        cmp     rsi, 3
        je      .L1
        movzx   edx, BYTE PTR [rdi]
        sal     eax, 8
        or      eax, edx
.L1:
        ret
.L18:
        xor     eax, eax
        ret
.L2:
        push    rax
        mov     ecx, OFFSET FLAT:.LC0
        mov     edx, 11
        mov     esi, OFFSET FLAT:.LC1
        mov     edi, OFFSET FLAT:.LC2
        call    __assert_fail
buffer_:
        .zero   4

In case if NDEBUG macro is defined, then compiler does not unroll the function
and the output assembly is following:

assign32(char const*, unsigned long):
        xor     eax, eax
        test    rsi, rsi
        je      .L4
.L3:
        sub     rsi, 1
        sal     eax, 8
        movzx   edx, BYTE PTR [rdi+rsi]
        or      eax, edx
        test    rsi, rsi
        jne     .L3
        ret
.L4:
        ret

So from my point of view we are missing the optimization in the Release build

Reply via email to