Issue 147358
Summary [AVX-512] Missed comparison->discrete-masked-OR fold
Labels new issue
Assignees
Reporter Validark
    [Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:22,endLineNumber:1,positionColumn:22,positionLineNumber:1,selectionStartColumn:22,selectionStartLineNumber:1,startColumn:22,startLineNumber:1),source:'export+fn+lower_simd(x:+@Vector(64,+u8))+@Vector(64,+u8)+%7B%0A++++return+@select(%0A++++++++u8,%0A++++++++@as(@Vector(64,+bool),+@bitCast(@intFromBool(@as(@Vector(64,+u8),+@splat(!'A!'))+%3C%3D+x)+%26%0A++++++++++++@intFromBool(@as(@Vector(64,+u8),+@splat(!'Z!'))+%3E%3D+x))),%0A++++++++x+%7C+@as(@Vector(64,+u8),+@splat(0x20)),%0A++++++++x,%0A++++)%3B%0A%7D'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:61.31071190951432,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:z0141,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,libs:!(),options:'-mcpu%3Dznver5+-target+x86_64-linux+-OReleaseFast',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+0.14.1+(Editor+%231)',t:'0')),k:38.6892880904857,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)

[LLVM Godbolt](https://llvm.godbo.lt/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXACx8BBAKoBnTAAUAHpwAMvAFYTStJg1C1aANxakl9ZATwDKjdAGFUtAK4sGexwBk8DTAA5DwAjTGIQAE5SAAdUBUI7Bhd3Tz04hNsBPwDgljCI6KtMGyShAiZiAhSPLy5LTGsshnLKghyg0PCoywqqmrT6hT72/0787siASktUN2Jkdg4sGgCAanQFVAB9WlRRWjWAUgBmJwA2STXVNbwADlPsNYABPYB3cO2ElnQIU4urjd7o9jgAmACsGimaz2B22bgYzDY6G2THQ6GIxwA7AAhI4aACC2EExAAniB8QS1mDwVxjicACJrNHoekA663B4nJ5HCEaUhs%2B5rAC053BAqFovFnJFYold1l0slcplUvlivVatVKuVSoVWt1mp1%2BuNGu1erNhvNRotBpNttNdst9ptrut7qtnpdHu9Xud/qdgcdwYdobdfqDYZ9AZD4d98ZjUYjsfNj0p1N54NB9KZeGQLBiazctAIbMuHOB3Jp9UFCtB53V9cbDZlTdbLaFbc7HbrPbWXd7zaH7eH3dHg5Hk7HU4n07ns4X/b7A6X49XM/X883i5Xu%2BX%2B7Xe8PB43R9PJ63Z8vF53N%2B3S7ThIzEJOObWJDLgM5IMz/NraxOUF1UA4CgJlEDwLAoUIOgqCFRg%2BC4IApCEOQ0D0MgjDYKwxCcLQzCCOwwjcOI/CiPIkiKLIyiaOoujUIYlCmLwxiWOY0jWI49iqM4njuNo3jEMfKkaSuU4mWKGxPwrLgfwhMD/nLIEuR5F8BUUr9K1UyF0zWYhMFLDSKxU0TKSOLEGUpDgZloThwV4LwOC0UhUE4HwfAANQAWTWABJAAlNYtnmRYwROHhSAITRrJmABrEBIX0ThJAc6KXM4XgFBAfkoqc6zSDgWAkDQAs6HCchKBKmIyoiEwbGIBFYr4OgCHCLKIBCNKQn8SpyW4XhuuYMkAHkQm0Epcoikq2EEYaGFoPrnKwEI3GAJwxFoLL%2BtILAWEMYBxDyna8H00pTEwLbnMwVQSjcVrOAi/xWtso7aDwEJiF6lwsDSghiDwFgHt4c7iBCeJMAZTA9qMN6jGimYqAMYAFA8vBMDeYaYkYIGZEEEQxHYKRcfkJQ1DS3R6gMOGTHMfR3qyyAZlQGJmi24VhtfYUWGQGI3HEgAvBgQfslyQf%2BrAGYgGZJOaBwGGcVxam8eWOjyAp0niRIBAGOpYk15pVa6CIhkaCayhGHW9Bl822kNiZjd6NpLaGEY7fVrhpbmBZCZsuzUqO1yOBhWhkDWCA/sa6EIFwQgP15cKpl4XKtCmOKEv5F6UtIRznMDzLssi%2BHfY4UF/dzjLC7y1PSBBhJ7EkIA%3D%3D%3D)

Idea:

```zig
// For each letter, check if it is uppercase, and if it is, make it lowercase.
// otherwise, leave it as what it was before
export fn lower_simd(x: @Vector(64, u8)) @Vector(64, u8) {
    return @select(
 u8,
        @as(@Vector(64, bool), @bitCast(@intFromBool(@as(@Vector(64, u8), @splat('A')) <= x) &
            @intFromBool(@as(@Vector(64, u8), @splat('Z')) >= x))),
        x | @as(@Vector(64, u8), @splat(0x20)),
 x,
    );
}
```

LLVM optimizes to:

```llvm
define dso_local <64 x i8> @lower_simd(<64 x i8> %0, <64 x i8> %1) local_unnamed_addr {
Entry:
 %2 = add <64 x i8> %1, <i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65, i8 -65>
  %3 = icmp ult <64 x i8> %2, <i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26, i8 26>
  %4 = or <64 x i8> %1, <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
  %5 = select <64 x i1> %3, <64 x i8> %4, <64 x i8> %1
  ret <64 x i8> %5
}
```

Emit:

```asm
.LCPI0_0:
 .zero   64,191
.LCPI0_1:
        .zero   64,26
.LCPI0_3:
 .zero   4,32
lower_simd:                             # @lower_simd
 vpaddb  zmm0, zmm1, zmmword ptr [rip + .LCPI0_0]
        vpord   zmm2, zmm1, dword ptr [rip + .LCPI0_3]{1to16}
        vpcmpltub       k1, zmm0, zmmword ptr [rip + .LCPI0_1]
        vpblendmb       zmm0 {k1}, zmm1, zmm2
 ret
```

Should be:

```asm
.LCPI0_0:
        .zero 64,191
.LCPI0_1:
        .zero   64,26
.LCPI0_2:
        .zero 64,32
lower_simd:                             # @lower_simd
        vpaddb zmm1, zmm0, zmmword ptr [rip + .LCPI0_0]
        vpcmpltub       k1, zmm1, zmmword ptr [rip + .LCPI0_1]
        vpaddb  zmm0 {k1}, zmm0, zmmword ptr [rip + .LCPI0_2]
        ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to