Issue 169366
Summary Optimization: Don't waste a call-preserved register if a variable's value can be trivially derived.
Labels new issue
Assignees
Reporter Explorer09
    I'm presenting two cases when a local variable's value can be derived trivially and that it would be a waste of a call-preserved register (or stack space) if the compiler preserves the temporary value of it across function calls.

* `func1`: `value` is simply retrieved from an object pointed by a pointer (`ptr`). The `ptr` can be kept in a call-preserved register, but there's no need to keep the temporary result of `value`.
* `func2`: `x` is simply `value * 2`. While `value` can be kept in a call-preserved register, there is no need to preserve `x` as `value * 2` can be trivially calculated (e.g. with an LEA instruction in x86).

(Clang is able to correctly optimize the `func3` case, where it knows `x = value + 3` does not need to be preserved. I show the `func3` case here for a comparison with the `func2` case.)

```c
extern void subroutine1(int* ptr, int value);
int func1_a(int* ptr) {
    int value = ptr[0];
    subroutine1(ptr, value);
    subroutine1(ptr, value);
 return value;
}
int func1_b(int* ptr) {
    int value = ptr[0];
 subroutine1(ptr, value);
    subroutine1(ptr, ptr[0]);
    return ptr[0];
}

extern void subroutine2(int x, int y);
int func2_a(int value) {
    int x = value * 2;
    subroutine2(value, x);
    subroutine2(value, x);
    return value;
}
int func2_b(int value) {
    int x = value * 2;
 subroutine2(value, x);
    __asm__ ("" : "+r"(value));
 subroutine2(value, value * 2);
    return value;
}

#if 0
int func3_a(int value) {
    int x = value + 3;
    subroutine2(value, x);
 subroutine2(value, x + 5);
    return value;
}
#endif
```

([Compiler Explorer link](https://godbolt.org/z/5xrrq6Ycc))

x86-64 clang 21.1.0 with `-Os` option produces:

```assembly
func1_a:
        pushq   %rbp
 pushq   %rbx
        pushq   %rax
        movq    %rdi, %rbx
        movl (%rdi), %ebp
        movl    %ebp, %esi
        callq subroutine1@PLT
        movq    %rbx, %rdi
        movl    %ebp, %esi
 callq   subroutine1@PLT
        movl    %ebp, %eax
        addq    $8, %rsp
        popq    %rbx
        popq    %rbp
        retq
func1_b:
 pushq   %rbx
        movq    %rdi, %rbx
        movl    (%rdi), %esi
 callq   subroutine1@PLT
        movl    (%rbx), %esi
        movq %rbx, %rdi
        callq   subroutine1@PLT
        movl    (%rbx), %eax
 popq    %rbx
        retq
func2_a:
        pushq   %rbp
 pushq   %rbx
        pushq   %rax
        movl    %edi, %ebx
        leal (%rbx,%rbx), %ebp
        movl    %ebp, %esi
        callq subroutine2@PLT
        movl    %ebx, %edi
        movl    %ebp, %esi
 callq   subroutine2@PLT
        movl    %ebx, %eax
        addq    $8, %rsp
        popq    %rbx
        popq    %rbp
        retq
func2_b:
 pushq   %rbx
        movl    %edi, %ebx
        leal    (%rbx,%rbx), %esi
        callq   subroutine2@PLT
        leal    (%rbx,%rbx), %esi
 movl    %ebx, %edi
        callq   subroutine2@PLT
        movl %ebx, %eax
        popq    %rbx
        retq
```

(Note: I've also tested with AArch64 target and it also has the missed optimization. That is, a waste of a call-preserved register.)
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to