| Issue |
172609
|
| Summary |
[SLP][RISCV] Unable to finish vectorization with ExtractElement cost = 8
|
| Labels |
vectorizers
|
| Assignees |
alexey-bataev
|
| Reporter |
arcbbb
|
With the change in ExtractElement base cost
```
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index aedd7f124cef..e47d1ed2b877 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2460,7 +2460,7 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// In RVV, we could use vslidedown + vmv.x.s to extract element from vector
// and vslideup + vmv.s.x to insert element to vector.
- unsigned BaseCost = 1;
+ unsigned BaseCost = 8;
// When insertelement we should add the index with 1 as the input of vslideup.
unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
```
`opt -p slp-vectorizer -S -slp-threshold=-19 -mattr=+v -mtriple riscv32` is unable to finish with the test below:
```
define void @foo(ptr %__last.addr.011.i.i, ptr %call3) {
newFuncRoot:
br label %while.body.i.i
while.body.i.i:
%__last.addr.014.i.i = phi ptr [ %__last.addr.011.i.i, %newFuncRoot ], [ %__last.addr.0.i.i.31, %while.body.i.i ]
%__first.addr.013.i.i = phi ptr [ %call3, %newFuncRoot ], [ %incdec.ptr2.i.i.31, %while.body.i.i ]
%0 = load float, ptr %__first.addr.013.i.i, align 4
%1 = load float, ptr %__last.addr.014.i.i, align 4
store float %1, ptr %__first.addr.013.i.i, align 4
store float %0, ptr %__last.addr.014.i.i, align 4
%incdec.ptr2.i.i = getelementptr inbounds nuw i8, ptr %__first.addr.013.i.i, i32 4
%__last.addr.0.i.i = getelementptr inbounds i8, ptr %__last.addr.014.i.i, i32 -4
%2 = load float, ptr %incdec.ptr2.i.i, align 4
%3 = load float, ptr %__last.addr.0.i.i, align 4
store float %3, ptr %incdec.ptr2.i.i, align 4
store float %2, ptr %__last.addr.0.i.i, align 4
%incdec.ptr2.i.i.1 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i, i32 4
%__last.addr.0.i.i.1 = getelementptr inbounds i8, ptr %__last.addr.0.i.i, i32 -4
%4 = load float, ptr %incdec.ptr2.i.i.1, align 4
%5 = load float, ptr %__last.addr.0.i.i.1, align 4
store float %5, ptr %incdec.ptr2.i.i.1, align 4
store float %4, ptr %__last.addr.0.i.i.1, align 4
%incdec.ptr2.i.i.2 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.1, i32 4
%__last.addr.0.i.i.2 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.1, i32 -4
%6 = load float, ptr %incdec.ptr2.i.i.2, align 4
%7 = load float, ptr %__last.addr.0.i.i.2, align 4
store float %7, ptr %incdec.ptr2.i.i.2, align 4
store float %6, ptr %__last.addr.0.i.i.2, align 4
%incdec.ptr2.i.i.3 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.2, i32 4
%__last.addr.0.i.i.3 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.2, i32 -4
%8 = load float, ptr %incdec.ptr2.i.i.3, align 4
%9 = load float, ptr %__last.addr.0.i.i.3, align 4
store float %9, ptr %incdec.ptr2.i.i.3, align 4
store float %8, ptr %__last.addr.0.i.i.3, align 4
%incdec.ptr2.i.i.4 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.3, i32 4
%__last.addr.0.i.i.4 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.3, i32 -4
%10 = load float, ptr %incdec.ptr2.i.i.4, align 4
%11 = load float, ptr %__last.addr.0.i.i.4, align 4
store float %11, ptr %incdec.ptr2.i.i.4, align 4
store float %10, ptr %__last.addr.0.i.i.4, align 4
%incdec.ptr2.i.i.5 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.4, i32 4
%__last.addr.0.i.i.5 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.4, i32 -4
%12 = load float, ptr %incdec.ptr2.i.i.5, align 4
%13 = load float, ptr %__last.addr.0.i.i.5, align 4
store float %13, ptr %incdec.ptr2.i.i.5, align 4
store float %12, ptr %__last.addr.0.i.i.5, align 4
%incdec.ptr2.i.i.6 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.5, i32 4
%__last.addr.0.i.i.6 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.5, i32 -4
%14 = load float, ptr %incdec.ptr2.i.i.6, align 4
%15 = load float, ptr %__last.addr.0.i.i.6, align 4
store float %15, ptr %incdec.ptr2.i.i.6, align 4
store float %14, ptr %__last.addr.0.i.i.6, align 4
%incdec.ptr2.i.i.7 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.6, i32 4
%__last.addr.0.i.i.7 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.6, i32 -4
%16 = load float, ptr %incdec.ptr2.i.i.7, align 4
%17 = load float, ptr %__last.addr.0.i.i.7, align 4
store float %17, ptr %incdec.ptr2.i.i.7, align 4
store float %16, ptr %__last.addr.0.i.i.7, align 4
%incdec.ptr2.i.i.8 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.7, i32 4
%__last.addr.0.i.i.8 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.7, i32 -4
%18 = load float, ptr %incdec.ptr2.i.i.8, align 4
%19 = load float, ptr %__last.addr.0.i.i.8, align 4
store float %19, ptr %incdec.ptr2.i.i.8, align 4
store float %18, ptr %__last.addr.0.i.i.8, align 4
%incdec.ptr2.i.i.9 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.8, i32 4
%__last.addr.0.i.i.9 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.8, i32 -4
%20 = load float, ptr %incdec.ptr2.i.i.9, align 4
%21 = load float, ptr %__last.addr.0.i.i.9, align 4
store float %21, ptr %incdec.ptr2.i.i.9, align 4
store float %20, ptr %__last.addr.0.i.i.9, align 4
%incdec.ptr2.i.i.10 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.9, i32 4
%__last.addr.0.i.i.10 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.9, i32 -4
%22 = load float, ptr %incdec.ptr2.i.i.10, align 4
%23 = load float, ptr %__last.addr.0.i.i.10, align 4
store float %23, ptr %incdec.ptr2.i.i.10, align 4
store float %22, ptr %__last.addr.0.i.i.10, align 4
%incdec.ptr2.i.i.11 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.10, i32 4
%__last.addr.0.i.i.11 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.10, i32 -4
%24 = load float, ptr %incdec.ptr2.i.i.11, align 4
%25 = load float, ptr %__last.addr.0.i.i.11, align 4
store float %25, ptr %incdec.ptr2.i.i.11, align 4
store float %24, ptr %__last.addr.0.i.i.11, align 4
%incdec.ptr2.i.i.12 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.11, i32 4
%__last.addr.0.i.i.12 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.11, i32 -4
%26 = load float, ptr %incdec.ptr2.i.i.12, align 4
%27 = load float, ptr %__last.addr.0.i.i.12, align 4
store float %27, ptr %incdec.ptr2.i.i.12, align 4
store float %26, ptr %__last.addr.0.i.i.12, align 4
%incdec.ptr2.i.i.13 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.12, i32 4
%__last.addr.0.i.i.13 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.12, i32 -4
%28 = load float, ptr %incdec.ptr2.i.i.13, align 4
%29 = load float, ptr %__last.addr.0.i.i.13, align 4
store float %29, ptr %incdec.ptr2.i.i.13, align 4
store float %28, ptr %__last.addr.0.i.i.13, align 4
%incdec.ptr2.i.i.14 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.13, i32 4
%__last.addr.0.i.i.14 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.13, i32 -4
%30 = load float, ptr %incdec.ptr2.i.i.14, align 4
%31 = load float, ptr %__last.addr.0.i.i.14, align 4
store float %31, ptr %incdec.ptr2.i.i.14, align 4
store float %30, ptr %__last.addr.0.i.i.14, align 4
%incdec.ptr2.i.i.15 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.14, i32 4
%__last.addr.0.i.i.15 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.14, i32 -4
%32 = load float, ptr %incdec.ptr2.i.i.15, align 4
%33 = load float, ptr %__last.addr.0.i.i.15, align 4
store float %33, ptr %incdec.ptr2.i.i.15, align 4
store float %32, ptr %__last.addr.0.i.i.15, align 4
%incdec.ptr2.i.i.16 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.15, i32 4
%__last.addr.0.i.i.16 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.15, i32 -4
%34 = load float, ptr %incdec.ptr2.i.i.16, align 4
%35 = load float, ptr %__last.addr.0.i.i.16, align 4
store float %35, ptr %incdec.ptr2.i.i.16, align 4
store float %34, ptr %__last.addr.0.i.i.16, align 4
%incdec.ptr2.i.i.17 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.16, i32 4
%__last.addr.0.i.i.17 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.16, i32 -4
%36 = load float, ptr %incdec.ptr2.i.i.17, align 4
%37 = load float, ptr %__last.addr.0.i.i.17, align 4
store float %37, ptr %incdec.ptr2.i.i.17, align 4
store float %36, ptr %__last.addr.0.i.i.17, align 4
%incdec.ptr2.i.i.18 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.17, i32 4
%__last.addr.0.i.i.18 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.17, i32 -4
%38 = load float, ptr %incdec.ptr2.i.i.18, align 4
%39 = load float, ptr %__last.addr.0.i.i.18, align 4
store float %39, ptr %incdec.ptr2.i.i.18, align 4
store float %38, ptr %__last.addr.0.i.i.18, align 4
%incdec.ptr2.i.i.19 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.18, i32 4
%__last.addr.0.i.i.19 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.18, i32 -4
%40 = load float, ptr %incdec.ptr2.i.i.19, align 4
%41 = load float, ptr %__last.addr.0.i.i.19, align 4
store float %41, ptr %incdec.ptr2.i.i.19, align 4
store float %40, ptr %__last.addr.0.i.i.19, align 4
%incdec.ptr2.i.i.20 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.19, i32 4
%__last.addr.0.i.i.20 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.19, i32 -4
%42 = load float, ptr %incdec.ptr2.i.i.20, align 4
%43 = load float, ptr %__last.addr.0.i.i.20, align 4
store float %43, ptr %incdec.ptr2.i.i.20, align 4
store float %42, ptr %__last.addr.0.i.i.20, align 4
%incdec.ptr2.i.i.21 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.20, i32 4
%__last.addr.0.i.i.21 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.20, i32 -4
%44 = load float, ptr %incdec.ptr2.i.i.21, align 4
%45 = load float, ptr %__last.addr.0.i.i.21, align 4
store float %45, ptr %incdec.ptr2.i.i.21, align 4
store float %44, ptr %__last.addr.0.i.i.21, align 4
%incdec.ptr2.i.i.22 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.21, i32 4
%__last.addr.0.i.i.22 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.21, i32 -4
%46 = load float, ptr %incdec.ptr2.i.i.22, align 4
%47 = load float, ptr %__last.addr.0.i.i.22, align 4
store float %47, ptr %incdec.ptr2.i.i.22, align 4
store float %46, ptr %__last.addr.0.i.i.22, align 4
%incdec.ptr2.i.i.23 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.22, i32 4
%__last.addr.0.i.i.23 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.22, i32 -4
%48 = load float, ptr %incdec.ptr2.i.i.23, align 4
%49 = load float, ptr %__last.addr.0.i.i.23, align 4
store float %49, ptr %incdec.ptr2.i.i.23, align 4
store float %48, ptr %__last.addr.0.i.i.23, align 4
%incdec.ptr2.i.i.24 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.23, i32 4
%__last.addr.0.i.i.24 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.23, i32 -4
%50 = load float, ptr %incdec.ptr2.i.i.24, align 4
%51 = load float, ptr %__last.addr.0.i.i.24, align 4
store float %51, ptr %incdec.ptr2.i.i.24, align 4
store float %50, ptr %__last.addr.0.i.i.24, align 4
%incdec.ptr2.i.i.25 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.24, i32 4
%__last.addr.0.i.i.25 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.24, i32 -4
%52 = load float, ptr %incdec.ptr2.i.i.25, align 4
%53 = load float, ptr %__last.addr.0.i.i.25, align 4
store float %53, ptr %incdec.ptr2.i.i.25, align 4
store float %52, ptr %__last.addr.0.i.i.25, align 4
%incdec.ptr2.i.i.26 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.25, i32 4
%__last.addr.0.i.i.26 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.25, i32 -4
%54 = load float, ptr %incdec.ptr2.i.i.26, align 4
%55 = load float, ptr %__last.addr.0.i.i.26, align 4
store float %55, ptr %incdec.ptr2.i.i.26, align 4
store float %54, ptr %__last.addr.0.i.i.26, align 4
%incdec.ptr2.i.i.27 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.26, i32 4
%__last.addr.0.i.i.27 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.26, i32 -4
%56 = load float, ptr %incdec.ptr2.i.i.27, align 4
%57 = load float, ptr %__last.addr.0.i.i.27, align 4
store float %57, ptr %incdec.ptr2.i.i.27, align 4
store float %56, ptr %__last.addr.0.i.i.27, align 4
%incdec.ptr2.i.i.28 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.27, i32 4
%__last.addr.0.i.i.28 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.27, i32 -4
%58 = load float, ptr %incdec.ptr2.i.i.28, align 4
%59 = load float, ptr %__last.addr.0.i.i.28, align 4
store float %59, ptr %incdec.ptr2.i.i.28, align 4
store float %58, ptr %__last.addr.0.i.i.28, align 4
%incdec.ptr2.i.i.29 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.28, i32 4
%__last.addr.0.i.i.29 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.28, i32 -4
%60 = load float, ptr %incdec.ptr2.i.i.29, align 4
%61 = load float, ptr %__last.addr.0.i.i.29, align 4
store float %61, ptr %incdec.ptr2.i.i.29, align 4
store float %60, ptr %__last.addr.0.i.i.29, align 4
%incdec.ptr2.i.i.30 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.29, i32 4
%__last.addr.0.i.i.30 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.29, i32 -4
%62 = load float, ptr %incdec.ptr2.i.i.30, align 4
%63 = load float, ptr %__last.addr.0.i.i.30, align 4
store float %63, ptr %incdec.ptr2.i.i.30, align 4
store float %62, ptr %__last.addr.0.i.i.30, align 4
%incdec.ptr2.i.i.31 = getelementptr inbounds nuw i8, ptr %incdec.ptr2.i.i.30, i32 4
%__last.addr.0.i.i.31 = getelementptr inbounds i8, ptr %__last.addr.0.i.i.30, i32 -4
%cmp1.i.i.31 = icmp ult ptr %incdec.ptr2.i.i.31, %__last.addr.0.i.i.31
br i1 %cmp1.i.i.31, label %while.body.i.i, label %invoke.cont21.exitStub
invoke.cont21.exitStub:
ret void
}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs