Issue |
136516
|
Summary |
[AArch64] `clz` on u8/u16 unnecessarily zero-extended to u32
|
Labels |
new issue
|
Assignees |
|
Reporter |
Kmeakin
|
The result of the `clz` instruction on an 8-bit or 16-bit value is unecessarily zero-extended to 32-bits via `and`
# C++ example
https://godbolt.org/z/rc9P4a54a
```c++
#include <cstdint>
using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using u128 = __uint128_t;
auto src8(u8 x) { return __builtin_clzg(x); }
auto src16(u16 x) { return __builtin_clzg(x); }
```
# Clang assembly
```asm
src8(unsigned char):
and w8, w0, #0xff
clz w8, w8
sub w8, w8, #24
and w0, w8, #0xff
ret
src16(unsigned short):
and w8, w0, #0xffff
clz w8, w8
sub w8, w8, #16
and w0, w8, #0xffff
ret
```
# GCC assembly
```asm
src8(unsigned char):
and w0, w0, 255
clz w0, w0
sub w0, w0, #24
ret
src16(unsigned short):
and w0, w0, 65535
clz w0, w0
sub w0, w0, #16
ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs