Issue 137514
Summary Missed optimization: Suboptimal div / rem for 128-bits int by constant lowering
Labels new issue
Assignees
Reporter MaxGraey
    For most constant divisors, the 128-bit type is generated a libcall instead of using magic constant + inlined mul_high_128 as GCC does.

Let's take `(__uint128_t)x / 19` for instance:

```c++
uint64_t div_by_19(__uint128_t x) {
    return x / 19;
}
```

clang (trunk):

```asm
div_by_19(unsigned __int128):
 push    rax
        mov     edx, 19
        xor     ecx, ecx
 call    __udivti3@PLT
        pop     rcx
        ret
```

Expected / GCC (trunk):

```asm
div_by_19(unsigned __int128):
        movabs  rcx, 18014398509481983
        mov     r8, rdi
        mov     rax, r8
 and     rdi, rcx
        shrd    rax, rsi, 54
        shr     rsi, 44
 and     rax, rcx
        add     rdi, rax
        movabs  rax, -2912643801112034465
        add     rsi, rdi
        mul     rsi
 shr     rdx, 4
        lea     rax, [rdx+rdx*8]
        lea     rax, [rdx+rax*2]
        sub     rsi, rax
        movabs  rax, -8737931403336103397
        sub     r8, rsi
        imul    rax, r8
 ret
```

Link to more examples: https://godbolt.org/z/qhM6975b4
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to