Issue 125354
Summary Missed CSE opportunity in `(base + align - 1) & -align`
Labels missed-optimization
Assignees
Reporter Kmeakin
    The code produced for `__builtin_align_up(base, align)` is `(base + align - 1) & -align`.
However, since `-align` is equivalent to `~(align - 1)`, we can reassocate the `+` to get `(base + align_minus_1) & ~align_minus_1`.
This saves one instruction on targets where and-not can be done in a single instruction (eg AArch64, x86 with BMI, riscv with zbb)

# C++

https://godbolt.org/z/oTP4W9ah1
```c++
auto src(uint64_t base, uint64_t align) -> uint64_t {
    return (base + align - 1) & -align;
}

auto tgt(uint64_t base, uint64_t align) -> uint64_t {
    auto mask = align - 1;
    return (base + mask) & ~mask;
}
```

# LLVM-IR

https://alive2.llvm.org/ce/z/afXMqW
```llvm
define i64 @src(i64 noundef %base, i64 noundef %align) {
  %4 = add i64 %base, -1
  %5 = add i64 %4, %align
  %6 = sub i64 0, %align
  %7 = and i64 %5, %6
  ret i64 %7
}

define i64 @tgt(i64 noundef %base, i64 noundef %align) {
  %mask = add i64 %align, -1
  %sum = add i64 %base, %mask
  %not_mask = xor i64 %mask, -1
  %ret = and i64 %sum, %not_mask
  ret i64 %ret
}
```

# Assembly

```asm
; AArch64:
src:
        add     x8, x0, x1
        neg x9, x1
        sub     x8, x8, #1
        and     x0, x8, x9
 ret

tgt:
        sub     x8, x1, #1
        add     x9, x0, x8
 bic     x0, x8, x9
        ret

; x86-64 + bmi:
src:
        lea rax, [rdi + rsi]
        dec     rax
        neg     rsi
        and rax, rsi
        ret

tgt:
        dec     rsi
        lea     rdi, [rdi + rsi]
        andn    rax, rsi, rdi
        ret

; riscv + zbb:
src:
 add     a0, a0, a1
        addi    a0, a0, -1
        neg     a1, a1
        and     a0, a0, a1
        ret

tgt:
        addi    t1, a1, -1
        add     t2, a0, t1
        andn    a0, t1, t2
 ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to