Issue 160348
Summary [AVX-512] Replace static `vpermi2w` with `vpermi2b` on Intel
Labels new issue
Assignees
Reporter Validark
    On many Intel targets, `vpermi2w` has higher latency than `vpermi2b`. Therefore, we should prefer to use `vpermi2b` where we can.

```llvm
define dso_local <64 x i8> @foo(<64 x i8> %0) local_unnamed_addr {
Entry:
  %1 = shufflevector <64 x i8> %0, <64 x i8> poison, <64 x i32> <i32 0, i32 1, i32 8, i32 9, i32 16, i32 17, i32 24, i32 25, i32 32, i32 33, i32 40, i32 41, i32 48, i32 49, i32 56, i32 57, i32 2, i32 3, i32 10, i32 11, i32 18, i32 19, i32 26, i32 27, i32 34, i32 35, i32 42, i32 43, i32 50, i32 51, i32 58, i32 59, i32 4, i32 5, i32 12, i32 13, i32 20, i32 21, i32 28, i32 29, i32 36, i32 37, i32 44, i32 45, i32 52, i32 53, i32 60, i32 61, i32 6, i32 7, i32 14, i32 15, i32 22, i32 23, i32 30, i32 31, i32 38, i32 39, i32 46, i32 47, i32 54, i32 55, i32 62, i32 63>
  ret <64 x i8> %1
}
```

Compiles to:

```asm
.LCPI0_1:
        .byte   0
 .byte   4
        .byte   8
        .byte   12
        .byte   16
 .byte   20
        .byte   24
        .byte   28
        .byte   1
 .byte   5
        .byte   9
        .byte   13
        .byte   17
 .byte   21
        .byte   25
        .byte   29
        .byte   2
 .byte   6
        .byte   10
        .byte   14
        .byte   18
 .byte   22
        .byte   26
        .byte   30
        .byte   3
 .byte   7
        .byte   11
        .byte   15
        .byte   19
 .byte   23
        .byte   27
        .byte   31
foo:
 vpmovsxbw       zmm1, ymmword ptr [rip + .LCPI0_1]
        vpermw  zmm0, zmm1, zmm0
        ret
```

Should be:

```asm
.LCPI0_0:
 .byte   0
        .byte   1
        .byte   8
        .byte   9
 .byte   16
        .byte   17
        .byte   24
        .byte   25
 .byte   32
        .byte   33
        .byte   40
        .byte   41
 .byte   48
        .byte   49
        .byte   56
        .byte   57
 .byte   2
        .byte   3
        .byte   10
        .byte   11
 .byte   18
        .byte   19
        .byte   26
        .byte 27
        .byte   34
        .byte   35
        .byte   42
        .byte 43
        .byte   50
        .byte   51
        .byte   58
 .byte   59
        .byte   4
        .byte   5
        .byte   12
 .byte   13
        .byte   20
        .byte   21
        .byte   28
 .byte   29
        .byte   36
        .byte   37
        .byte   44
 .byte   45
        .byte   52
        .byte   53
        .byte   60
 .byte   61
        .byte   6
        .byte   7
        .byte   14
 .byte   15
        .byte   22
        .byte   23
        .byte 30
        .byte   31
        .byte   38
        .byte   39
        .byte 46
        .byte   47
        .byte   54
        .byte   55
 .byte   62
        .byte   63
sh:
        vmovdqa64       zmm1, zmmword ptr [rip + .LCPI0_0]
        vpermb  zmm0, zmm1, zmm0
        ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to