Issue 123727
Summary [X86][AVX512] Generate `valignq` instead of `vpermi2pd`
Labels question, backend:X86
Assignees abhishek-kaushik22
Reporter abhishek-kaushik22
    Consider this ir:

```llvm
define <8 x double> @bar(i64 %x, ptr %arr, <8 x double> %val1, <8 x double> %val2) {
entry:
    %gep1 = getelementptr double, ptr %arr, i64 %x
    %gepload1 = load <8 x double>, ptr %gep1, align 16
    %z = add i64 %x, 8
    %gep3 = getelementptr double, ptr %arr, i64 %z
 %gepload3 = load <8 x double>, ptr %gep3, align 16
    %shuffle1 = shufflevector <8 x double> %gepload1, <8 x double> %gepload3, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
    store <8 x double> %shuffle1, ptr %gep1, align 16
    ret <8 x double> %gepload3
}
```

Currently, we generate 
```asm
.LCPI0_1:
 .byte   1                               # 0x1
        .byte   2 # 0x2
        .byte   3                               # 0x3
        .byte   4                               # 0x4
        .byte   5 # 0x5
        .byte   6 # 0x6
        .byte   7                               # 0x7
 .byte   8                               # 0x8
bar: # @bar
        vmovupd zmm1, zmmword ptr [rsi + 8*rdi]
 vmovupd zmm0, zmmword ptr [rsi + 8*rdi + 64]
        vpmovsxbq       zmm2, qword ptr [rip + .LCPI0_1] # zmm2 = [1,2,3,4,5,6,7,8]
        vpermi2pd zmm2, zmm1, zmm0
        vmovupd zmmword ptr [rsi + 8*rdi], zmm2
 ret
```

but can't we generate a `valignq` instead of the full permute?
```asm
bar:                                    # @bar
 vmovupd zmm1, zmmword ptr [rsi + 8*rdi]
        vmovupd zmm0, zmmword ptr [rsi + 8*rdi + 64]
        valignq zmm2, zmm1, zmm0, 1
        vmovupd zmmword ptr [rsi + 8*rdi], zmm2
        ret

```
This saves us from saving the pattern `.LCPI0_1` in the binary.

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to