Issue |
147358
|
Summary |
[AVX-512] Missed comparison->discrete-masked-OR fold
|
Labels |
new issue
|
Assignees |
|
Reporter |
Validark
|
[Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:22,endLineNumber:1,positionColumn:22,positionLineNumber:1,selectionStartColumn:22,selectionStartLineNumber:1,startColumn:22,startLineNumber:1),source:'export+fn+lower_simd(x:+@Vector(64,+u8))+@Vector(64,+u8)+%7B%0A++++return+@select(%0A++++++++u8,%0A++++++++@as(@Vector(64,+bool),+@bitCast(@intFromBool(@as(@Vector(64,+u8),+@splat(!'A!'))+%3C%3D+x)+%26%0A++++++++++++@intFromBool(@as(@Vector(64,+u8),+@splat(!'Z!'))+%3E%3D+x))),%0A++++++++x+%7C+@as(@Vector(64,+u8),+@splat(0x20)),%0A++++++++x,%0A++++)%3B%0A%7D'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:61.31071190951432,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:z0141,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,libs:!(),options:'-mcpu%3Dznver5+-target+x86_64-linux+-OReleaseFast',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+0.14.1+(Editor+%231)',t:'0')),k:38.6892880904857,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
[LLVM Godbolt](https://llvm.godbo.lt/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXACx8BBAKoBnTAAUAHpwAMvAFYTStJg1C1aANxakl9ZATwDKjdAGFUtAK4sGexwBk8DTAA5DwAjTGIQAE5SAAdUBUI7Bhd3Tz04hNsBPwDgljCI6KtMGyShAiZiAhSPLy5LTGsshnLKghyg0PCoywqqmrT6hT72/0787siASktUN2Jkdg4sGgCAanQFVAB9WlRRWjWAUgBmJwA2STXVNbwADlPsNYABPYB3cO2ElnQIU4urjd7o9jgAmACsGimaz2B22bgYzDY6G2THQ6GIxwA7AAhI4aACC2EExAAniB8QS1mDwVxjicACJrNHoekA663B4nJ5HCEaUhs%2B5rAC053BAqFovFnJFYold1l0slcplUvlivVatVKuVSoVWt1mp1%2BuNGu1erNhvNRotBpNttNdst9ptrut7qtnpdHu9Xud/qdgcdwYdobdfqDYZ9AZD4d98ZjUYjsfNj0p1N54NB9KZeGQLBiazctAIbMuHOB3Jp9UFCtB53V9cbDZlTdbLaFbc7HbrPbWXd7zaH7eH3dHg5Hk7HU4n07ns4X/b7A6X49XM/X883i5Xu%2BX%2B7Xe8PB43R9PJ63Z8vF53N%2B3S7ThIzEJOObWJDLgM5IMz/NraxOUF1UA4CgJlEDwLAoUIOgqCFRg%2BC4IApCEOQ0D0MgjDYKwxCcLQzCCOwwjcOI/CiPIkiKLIyiaOoujUIYlCmLwxiWOY0jWI49iqM4njuNo3jEMfKkaSuU4mWKGxPwrLgfwhMD/nLIEuR5F8BUUr9K1UyF0zWYhMFLDSKxU0TKSOLEGUpDgZloThwV4LwOC0UhUE4HwfAANQAWTWABJAAlNYtnmRYwROHhSAITRrJmABrEBIX0ThJAc6KXM4XgFBAfkoqc6zSDgWAkDQAs6HCchKBKmIyoiEwbGIBFYr4OgCHCLKIBCNKQn8SpyW4XhuuYMkAHkQm0Epcoikq2EEYaGFoPrnKwEI3GAJwxFoLL%2BtILAWEMYBxDyna8H00pTEwLbnMwVQSjcVrOAi/xWtso7aDwEJiF6lwsDSghiDwFgHt4c7iBCeJMAZTA9qMN6jGimYqAMYAFA8vBMDeYaYkYIGZEEEQxHYKRcfkJQ1DS3R6gMOGTHMfR3qyyAZlQGJmi24VhtfYUWGQGI3HEgAvBgQfslyQf%2BrAGYgGZJOaBwGGcVxam8eWOjyAp0niRIBAGOpYk15pVa6CIhkaCayhGHW9Bl822kNiZjd6NpLaGEY7fVrhpbmBZCZsuzUqO1yOBhWhkDWCA/sa6EIFwQgP15cKpl4XKtCmOKEv5F6UtIRznMDzLssi%2BHfY4UF/dzjLC7y1PSBBhJ7EkIA%3D%3D%3D)
Idea:
```zig
// For each letter, check if it is uppercase, and if it is, make it lowercase.
// otherwise, leave it as what it was before
export fn lower_simd(x: @Vector(64, u8)) @Vector(64, u8) {
return @select(
u8,
@as(@Vector(64, bool), @bitCast(@intFromBool(@as(@Vector(64, u8), @splat('A')) <= x) &
@intFromBool(@as(@Vector(64, u8), @splat('Z')) >= x))),
x | @as(@Vector(64, u8), @splat(0x20)),
x,
);
}
```
LLVM optimizes to:
```llvm
define dso_local <64 x i8> @lower_simd(<64 x i8> %0, <64 x i8> %1) local_unnamed_addr {
Entry:
%2 = add <64 x i8> %1, <i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65>
%3 = icmp ult <64 x i8> %2, <i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26>
%4 = or <64 x i8> %1, <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
%5 = select <64 x i1> %3, <64 x i8> %4, <64 x i8> %1
ret <64 x i8> %5
}
```
Emit:
```asm
.LCPI0_0:
.zero 64,191
.LCPI0_1:
.zero 64,26
.LCPI0_3:
.zero 4,32
lower_simd: # @lower_simd
vpaddb zmm0, zmm1, zmmword ptr [rip + .LCPI0_0]
vpord zmm2, zmm1, dword ptr [rip + .LCPI0_3]{1to16}
vpcmpltub k1, zmm0, zmmword ptr [rip + .LCPI0_1]
vpblendmb zmm0 {k1}, zmm1, zmm2
ret
```
Should be:
```asm
.LCPI0_0:
.zero 64,191
.LCPI0_1:
.zero 64,26
.LCPI0_2:
.zero 64,32
lower_simd: # @lower_simd
vpaddb zmm1, zmm0, zmmword ptr [rip + .LCPI0_0]
vpcmpltub k1, zmm1, zmmword ptr [rip + .LCPI0_1]
vpaddb zmm0 {k1}, zmm0, zmmword ptr [rip + .LCPI0_2]
ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs