Issue 127244
Summary SLP vectorizer performance regression caused by 88e7b8b81c061113399637f936937ffaf5a9bc08, #125725
Labels new issue
Assignees
Reporter pclove1
    SLP vectorizer change in 88e7b8b81c061113399637f936937ffaf5a9bc08 / #125725 introduced a performance regression.

# A minimal reproducible example LLVM IR:

```
target datalayout = "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"

define ptx_kernel void @test() {
  %vec = bitcast i32 0 to <4 x i8>

  %elem0 = extractelement <4 x i8> %vec, i64 0
  %elem1 = extractelement <4 x i8> %vec, i64 1
  %elem2 = extractelement <4 x i8> %vec, i64 2
  %elem3 = extractelement <4 x i8> %vec, i64 3

  br label %1

1: ; preds = %1, %0
  %.p0 = phi i8 [ %elem0, %0 ], [ 0, %1 ]
  %.p1 = phi i8 [ %elem1, %0 ], [ 0, %1 ]
  %.p2 = phi i8 [ %elem2, %0 ], [ 0, %1 ]
  %.p3 = phi i8 [ %elem3, %0 ], [ 0, %1 ]

  %val0 = insertelement <4 x i8> poison, i8 %.p0, i64 0
  %val1 = insertelement <4 x i8> %val0, i8 %.p1, i64 1
  %val2 = insertelement <4 x i8> %val1, i8 %.p2, i64 2
  %val3 = insertelement <4 x i8> %val2, i8 %.p3, i64 3

  %val = bitcast <4 x i8> %val3 to i32

  br label %1
}
```

# SLP vectorizer behavior before the culprit:
```
target datalayout = "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"

define ptx_kernel void @test() {
  %vec = bitcast i32 0 to <4 x i8>
  br label %1

1: ; preds = %1, %0
  %2 = phi <4 x i8> [ %vec, %0 ], [ zeroinitializer, %1 ]
  %val = bitcast <4 x i8> %2 to i32
  br label %1
}
```
https://godbolt.org/z/o1orj4GWh

# SLP vectorizer behavior after the culprit:
```
target datalayout = "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"

define ptx_kernel void @test() {
  %vec = bitcast i32 0 to <4 x i8>
  %elem0 = extractelement <4 x i8> %vec, i64 0
  %elem1 = extractelement <4 x i8> %vec, i64 1
 %elem2 = extractelement <4 x i8> %vec, i64 2
  %elem3 = extractelement <4 x i8> %vec, i64 3
  br label %1

1: ; preds = %1, %0
  %.p0 = phi i8 [ %elem0, %0 ], [ 0, %1 ]
  %.p1 = phi i8 [ %elem1, %0 ], [ 0, %1 ]
  %.p2 = phi i8 [ %elem2, %0 ], [ 0, %1 ]
 %.p3 = phi i8 [ %elem3, %0 ], [ 0, %1 ]
  %val0 = insertelement <4 x i8> poison, i8 %.p0, i64 0
  %val1 = insertelement <4 x i8> %val0, i8 %.p1, i64 1
  %val2 = insertelement <4 x i8> %val1, i8 %.p2, i64 2
  %val3 = insertelement <4 x i8> %val2, i8 %.p3, i64 3
  %val = bitcast <4 x i8> %val3 to i32
  br label %1
}
```

https://godbolt.org/z/ha797ePhW

Out of curiosity, I tried reproducing at a quite recent git commit at 7ec60bf0166519317b5ae2505dd6ed4660e3ea39 and the performance regression was still there.

**Credit** to @metaflow for finding the culprit.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to