Issue 136516
Summary [AArch64] `clz` on u8/u16 unnecessarily zero-extended to u32
Labels new issue
Assignees
Reporter Kmeakin
    The result of the `clz` instruction on an 8-bit or 16-bit value is unecessarily zero-extended to 32-bits via `and`

# C++ example
https://godbolt.org/z/rc9P4a54a
```c++
#include <cstdint>

using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using u128 = __uint128_t;

auto src8(u8 x) { return __builtin_clzg(x); }
auto src16(u16 x) { return __builtin_clzg(x); }
```

# Clang assembly
```asm
src8(unsigned char):
        and     w8, w0, #0xff
        clz     w8, w8
        sub     w8, w8, #24
        and w0, w8, #0xff
        ret

src16(unsigned short):
        and     w8, w0, #0xffff
        clz     w8, w8
        sub     w8, w8, #16
        and w0, w8, #0xffff
        ret
```

# GCC assembly
```asm
src8(unsigned char):
        and     w0, w0, 255
 clz     w0, w0
        sub     w0, w0, #24
        ret
src16(unsigned short):
        and     w0, w0, 65535
        clz     w0, w0
        sub w0, w0, #16
        ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to