Issue |
125354
|
Summary |
Missed CSE opportunity in `(base + align - 1) & -align`
|
Labels |
missed-optimization
|
Assignees |
|
Reporter |
Kmeakin
|
The code produced for `__builtin_align_up(base, align)` is `(base + align - 1) & -align`.
However, since `-align` is equivalent to `~(align - 1)`, we can reassocate the `+` to get `(base + align_minus_1) & ~align_minus_1`.
This saves one instruction on targets where and-not can be done in a single instruction (eg AArch64, x86 with BMI, riscv with zbb)
# C++
https://godbolt.org/z/oTP4W9ah1
```c++
auto src(uint64_t base, uint64_t align) -> uint64_t {
return (base + align - 1) & -align;
}
auto tgt(uint64_t base, uint64_t align) -> uint64_t {
auto mask = align - 1;
return (base + mask) & ~mask;
}
```
# LLVM-IR
https://alive2.llvm.org/ce/z/afXMqW
```llvm
define i64 @src(i64 noundef %base, i64 noundef %align) {
%4 = add i64 %base, -1
%5 = add i64 %4, %align
%6 = sub i64 0, %align
%7 = and i64 %5, %6
ret i64 %7
}
define i64 @tgt(i64 noundef %base, i64 noundef %align) {
%mask = add i64 %align, -1
%sum = add i64 %base, %mask
%not_mask = xor i64 %mask, -1
%ret = and i64 %sum, %not_mask
ret i64 %ret
}
```
# Assembly
```asm
; AArch64:
src:
add x8, x0, x1
neg x9, x1
sub x8, x8, #1
and x0, x8, x9
ret
tgt:
sub x8, x1, #1
add x9, x0, x8
bic x0, x8, x9
ret
; x86-64 + bmi:
src:
lea rax, [rdi + rsi]
dec rax
neg rsi
and rax, rsi
ret
tgt:
dec rsi
lea rdi, [rdi + rsi]
andn rax, rsi, rdi
ret
; riscv + zbb:
src:
add a0, a0, a1
addi a0, a0, -1
neg a1, a1
and a0, a0, a1
ret
tgt:
addi t1, a1, -1
add t2, a0, t1
andn a0, t1, t2
ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs