| Issue |
92211
|
| Summary |
Spurious optimization triggered by a `zext i16 %0 to i64` but not `and i64 %0, 65535`
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
Validark
|
I define the following 2 Zig functions: https://zig.godbolt.org/z/3xfc5bjEc
```zig
export fn foo(x: u64) u64 {
var y: u64 = @as(u16, @truncate(x));
y = (y | (y << 24));
y = (y | (y << 12));
return y;
}
export fn bar(x: u16) u64 {
var y: u64 = x;
y = (y | (y << 24));
y = (y | (y << 12));
return y;
}
```
Emitting for Neoverse N2, I get:
```asm
foo:
and x8, x0, #0xffff
orr x8, x8, x8, lsl #24
orr x0, x8, x8, lsl #12
ret
bar:
mov w8, w0
ubfiz x10, x0, #12, #32
orr x9, x8, x8, lsl #24
orr x8, x10, x8, lsl #36
orr x0, x8, x9
ret
```
Here is the LLVM IR:
```llvm
define dso_local i64 @foo(i64 %0) local_unnamed_addr {
Entry:
%1 = and i64 %0, 65535
%2 = mul nuw nsw i64 %1, 16777217
%3 = mul nuw nsw i64 %1, 68719480832
%4 = or i64 %3, %2
ret i64 %4
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
define dso_local i64 @bar(i16 zeroext %0) local_unnamed_addr {
Entry:
%1 = zext i16 %0 to i64
%2 = mul nuw nsw i64 %1, 16777217
%3 = mul nuw nsw i64 %1, 68719480832
%4 = or i64 %3, %2
ret i64 %4
}
```
Here is the LLVM IR produced by Clang for "equivalent" C code:
```llvm
define dso_local range(i64 0, 4503599627370496) i64 @foo(i64 noundef %x) local_unnamed_addr {
entry:
%and = and i64 %x, 65535
%or = mul nuw nsw i64 %and, 16777217
%shl1 = mul nuw nsw i64 %and, 68719480832
%or2 = or i64 %shl1, %or
ret i64 %or2
}
define dso_local range(i64 0, 4503599627370496) i64 @bar(i16 noundef %x) local_unnamed_addr {
entry:
%conv = zext i16 %x to i64
%or = mul nuw nsw i64 %conv, 16777217
%shl1 = mul nuw nsw i64 %conv, 68719480832
%or2 = or i64 %shl1, %or
ret i64 %or2
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
```
And the assembly:
```asm
foo: // @foo
and x8, x0, #0xffff
orr x8, x8, x8, lsl #24
orr x0, x8, x8, lsl #12
ret
bar: // @bar
and x8, x0, #0xffff
orr x9, x8, x8, lsl #24
lsl x8, x8, #12
bfi x8, x0, #36, #16
orr x0, x8, x9
ret
```
On x86, compiling for Zen 4, I get:
```asm
foo:
movzx ecx, di
movabs rax, 68719480832
imul rax, rcx
mov rdx, rcx
shl rdx, 24
or rdx, rcx
or rax, rdx
ret
bar:
movabs rax, 68719480832
mov ecx, edi
mov rdx, rcx
shl rdx, 24
imul rax, rcx
or rdx, rcx
or rax, rdx
ret
```
Looks like LLVM is making the decision that a multiply is less expensive than a `shl`? Also, why can't we use `shlx` here? I would think we could do:
```asm
shlx rax, rdi, 24
or rdi, rax
shlx rax, rdi, 12
or rax, rdi
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs