Issue 147280
Summary x86-64 LOCK op, ptr, CONST generation is inconsistent and constant-dependent
Labels new issue
Assignees
Reporter orlp
    Consider these two nearly identical functions:

```rust
use std::sync::atomic::*;

#[inline(never)]
pub fn foo(x: &AtomicU64) -> bool {
    let old = x.fetch_and(!2, Ordering::Relaxed);
    let new = old & !2;
    new != 0
}

#[inline(never)]
pub fn bar(x: &AtomicU64) -> bool {
    let old = x.fetch_and(!1, Ordering::Relaxed);
    let new = old & !1;
    new != 0
}
```

One is optimized well to a `LOCK` instruction + flag test, the other to a CAS loop:

```asm
example::foo::hcccd03d7e323547c:
 lock            and     qword ptr [rdi], -3
        setne   al
 ret

example::bar::hd8685fcc36071aa3:
        mov     rax, qword ptr [rdi]
.LBB1_1:
        mov     rcx, rax
        and     rcx, -2
 lock            cmpxchg qword ptr [rdi], rcx
        jne     .LBB1_1
 cmp     rax, 2
        setae   al
        ret
```

The generated LLVM IR is

```llvm
define noundef zeroext i1 @example::foo::hcccd03d7e323547c(ptr noundef nonnull align 8 %x) unnamed_addr {
start:
  %0 = atomicrmw and ptr %x, i64 -3 monotonic, align 8
  %new = and i64 %0, -3
  %_0 = icmp ne i64 %new, 0
  ret i1 %_0
}
```

(and similarly for `bar` just with the constant adjusted)

---

In general I would expect an atomic arithmetic RMW operation followed by a test which could be answered using the zero flag, sign flag or overflow flag to be done using the `LOCK` prefix followed by inspecting the appropriate flag on x86-64.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to