Issue 148380
Summary [clang] Missed optimization regression: coro.destroy not devirtualized even when possible
Labels clang
Assignees
Reporter eyalz800
    In the below example - clang 18 generates good code, and clang 19 and up to trunk fail to do so.
```cpp
#include <coroutine>
    
struct coro
{
 struct promise_type
    {
        constexpr coro get_return_object() { return coro{}; }
        constexpr auto initial_suspend() noexcept { return std::suspend_never{}; }
        constexpr auto final_suspend() noexcept { return std::suspend_never{}; }
        auto unhandled_exception() {}
 constexpr auto return_void() {}
    };

    constexpr auto await_ready() { return false; }
    constexpr auto await_suspend(auto handle) { handle.destroy(); }
    constexpr auto await_resume() {}

};

coro f1() noexcept;
coro f2() noexcept
{
    co_await f1();
}
```

Clang 18:
```asm
f2():
        jmp     f1()@PLT
```

In Clang 19 and above:
```asm
f2():
        push    rbx
        mov     edi, 24
 call    operator new(unsigned long)@PLT
        mov     rbx, rax
 lea     rax, [rip + f2() (.resume)]
        mov     qword ptr [rbx], rax
 lea     rax, [rip + f2() (.destroy)]
        mov     qword ptr [rbx + 8], rax
        call    f1()@PLT
        mov     rdi, rbx
        mov byte ptr [rbx + 17], 0
        pop     rbx
        jmp     qword ptr [rdi + 8]

f2() (.resume):
        mov     esi, 24
        jmp     operator delete(void*, unsigned long)@PLT

f2() (.destroy):
        mov     esi, 24
        jmp     operator delete(void*, unsigned long)@PLT
```

It seems to be ralted to the compiler not realizing that `jmp     qword ptr [rdi + 8]` can be devirtualized into coro.destroy, which results in failing to inline it and optimize the code.

Please see the godbolt link:
https://godbolt.org/z/MnWrKjxEn
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to