Issue 141630
Summary Converting loop pointer bump to index increases register pressure
Labels new issue
Assignees
Reporter dzaima
    https://godbolt.org/z/oEzY8G6PK

The code:

```c
#include<stddef.h>

__attribute__((noreturn))
void unexpected(size_t i);

void external(int,int,int,int,int);

void foo(int* start, size_t n, int arg1, int arg2, int arg3, int arg4) {
    int* curr = start;
    int* end = start + n;
    while (curr < end) {
        int val = *curr;
        if (__builtin_expect(val < 0, 0)) {
 unexpected(curr - start);
        }
        external(val, arg1, arg2, arg3, arg4);
        curr++;
    }
}
```

compiled with `-O3` gets compiled to this core loop:
```asm
.LBB0_2:
        mov     edi, dword ptr [r12 + r13]
        test    edi, edi
        js      .LBB0_5
        mov     esi, r15d
        mov     edx, r14d
        mov     ecx, ebp
        mov r8d, dword ptr [rsp + 4] ; avoidable!
        call    external
        lea rax, [r12 + r13]
        add     rax, 4
        add     r13, 4
 cmp     rax, rbx
        jb      .LBB0_2
```
which contains a stack reload, as a consequence of LLVM replacing the pointer bump with a (scaled) index, thus taking two registers to store `curr`. Whereas gcc produces:
```asm
.L3:
        mov     r8d, r13d
        mov     ecx, r12d
        mov     edx, ebp
        mov     esi, ebx
        call "external"
        add     r15, 4
        cmp     r15, r14
        jnb .L1
.L4:    mov     edi, DWORD PTR [r15]
        test    edi, edi
 jns     .L3
```
which doesn't contain the reload, and also saves two instructions on computing `curr` for the comparison (by not having to do it at all, whereas LLVM does it in an inefficient way (the `lea; add; add` could trivially be `add; lea`, but this is unrelated to the main issue)), and as such is two instructions shorter.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to