Author: Pengcheng Wang
Date: 2025-07-11T18:59:23+08:00
New Revision: 43535be8ab3f6ffadd161358823d90c713c9d7be

URL: 
https://github.com/llvm/llvm-project/commit/43535be8ab3f6ffadd161358823d90c713c9d7be
DIFF: 
https://github.com/llvm/llvm-project/commit/43535be8ab3f6ffadd161358823d90c713c9d7be.diff

LOG: Revert "[RISCV] AddEdge between mask producer and user of V0 (#146855)"

This reverts commit aee21c368b41cd5f7765a31b9dbe77f2bffadd4e.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
    llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
    llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
    llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
    llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp 
b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
index 5464612d86bee..be54a8c95a978 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
@@ -10,10 +10,6 @@
 // instructions and masked instructions, so that we can reduce the live range
 // overlaps of mask registers.
 //
-// If there are multiple masks producers followed by multiple masked
-// instructions, then at each masked instructions add dependency edges between
-// every producer and masked instruction.
-//
 // The reason why we need to do this:
 // 1. When tracking register pressure, we don't track physical registers.
 // 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
@@ -72,25 +68,11 @@ class RISCVVectorMaskDAGMutation : public 
ScheduleDAGMutation {
 
   void apply(ScheduleDAGInstrs *DAG) override {
     SUnit *NearestUseV0SU = nullptr;
-    SmallVector<SUnit *, 2> DefMask;
     for (SUnit &SU : DAG->SUnits) {
       const MachineInstr *MI = SU.getInstr();
-      if (isSoleUseCopyToV0(SU))
-        DefMask.push_back(&SU);
-
-      if (MI->findRegisterUseOperand(RISCV::V0, TRI)) {
+      if (MI->findRegisterUseOperand(RISCV::V0, TRI))
         NearestUseV0SU = &SU;
 
-        // Copy may not be a real use, so skip it here.
-        if (DefMask.size() > 1 && !MI->isCopy())
-          for (SUnit *Def : DefMask)
-            if (DAG->canAddEdge(Def, &SU))
-              DAG->addEdge(Def, SDep(&SU, SDep::Artificial));
-
-        if (!DefMask.empty())
-          DefMask.erase(DefMask.begin());
-      }
-
       if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
           // For LMUL=8 cases, there will be more possibilities to spill.
           // FIXME: We should use RegPressureTracker to do fine-grained

diff  --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll 
b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 2d4fce68f9545..0d8aff306252e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -313,12 +313,12 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
 ; CHECK-NEXT:    vslidedown.vx v0, v6, a0
 ; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v6, v7, a1
-; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v7, a0
 ; CHECK-NEXT:    vslidedown.vx v5, v6, a0
+; CHECK-NEXT:    vslidedown.vx v4, v7, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vadd.vi v8, v8, 1, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v5
 ; CHECK-NEXT:    vadd.vi v16, v16, 1, v0.t
@@ -425,15 +425,13 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
 ; CHECK-NEXT:    vmerge.vim v16, v8, 1, v0
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v0, v5, a1
-; CHECK-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v6, a1
+; CHECK-NEXT:    vslidedown.vx v5, v7, a1
+; CHECK-NEXT:    vslidedown.vx v4, v6, a1
 ; CHECK-NEXT:    vsetvli a2, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vmv1r.v v0, v4
 ; CHECK-NEXT:    vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v0, v7, a1
-; CHECK-NEXT:    vsetvli a2, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v5
 ; CHECK-NEXT:    vadd.vi v16, v16, 1, v0.t
 ; CHECK-NEXT:    vadd.vv v8, v16, v8
 ; CHECK-NEXT:    addi a2, sp, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll 
b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 15417da962bd3..796f8dde58f47 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -139,20 +139,21 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind 
{
 ; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    sub sp, sp, a3
 ; RV32-NEXT:    andi sp, sp, -64
+; RV32-NEXT:    addi a3, sp, 64
 ; RV32-NEXT:    vl8r.v v8, (a0)
 ; RV32-NEXT:    slli a2, a2, 3
 ; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    vl8r.v v16, (a0)
+; RV32-NEXT:    vl8r.v v24, (a0)
 ; RV32-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
 ; RV32-NEXT:    vmseq.vi v0, v8, 0
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v24, v8, 1, v0
-; RV32-NEXT:    vmseq.vi v0, v16, 0
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    add a1, a0, a1
-; RV32-NEXT:    add a2, a0, a2
-; RV32-NEXT:    vs8r.v v24, (a0)
-; RV32-NEXT:    vmerge.vim v8, v8, 1, v0
+; RV32-NEXT:    vmv.v.i v16, 0
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a2, a3, a2
+; RV32-NEXT:    vmseq.vi v8, v24, 0
+; RV32-NEXT:    vmerge.vim v24, v16, 1, v0
+; RV32-NEXT:    vs8r.v v24, (a3)
+; RV32-NEXT:    vmv1r.v v0, v8
+; RV32-NEXT:    vmerge.vim v8, v16, 1, v0
 ; RV32-NEXT:    vs8r.v v8, (a2)
 ; RV32-NEXT:    lbu a0, 0(a1)
 ; RV32-NEXT:    addi sp, s0, -80
@@ -178,20 +179,21 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind 
{
 ; RV64-NEXT:    slli a3, a3, 4
 ; RV64-NEXT:    sub sp, sp, a3
 ; RV64-NEXT:    andi sp, sp, -64
+; RV64-NEXT:    addi a3, sp, 64
 ; RV64-NEXT:    vl8r.v v8, (a0)
 ; RV64-NEXT:    slli a2, a2, 3
 ; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    vl8r.v v16, (a0)
+; RV64-NEXT:    vl8r.v v24, (a0)
 ; RV64-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
 ; RV64-NEXT:    vmseq.vi v0, v8, 0
-; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    vmerge.vim v24, v8, 1, v0
-; RV64-NEXT:    vmseq.vi v0, v16, 0
-; RV64-NEXT:    addi a0, sp, 64
-; RV64-NEXT:    add a1, a0, a1
-; RV64-NEXT:    add a2, a0, a2
-; RV64-NEXT:    vs8r.v v24, (a0)
-; RV64-NEXT:    vmerge.vim v8, v8, 1, v0
+; RV64-NEXT:    vmv.v.i v16, 0
+; RV64-NEXT:    add a1, a3, a1
+; RV64-NEXT:    add a2, a3, a2
+; RV64-NEXT:    vmseq.vi v8, v24, 0
+; RV64-NEXT:    vmerge.vim v24, v16, 1, v0
+; RV64-NEXT:    vs8r.v v24, (a3)
+; RV64-NEXT:    vmv1r.v v0, v8
+; RV64-NEXT:    vmerge.vim v8, v16, 1, v0
 ; RV64-NEXT:    vs8r.v v8, (a2)
 ; RV64-NEXT:    lbu a0, 0(a1)
 ; RV64-NEXT:    addi sp, s0, -80

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll 
b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index fb070b24a4f34..2587411566a3f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -324,23 +324,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    sw s0, 376(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    addi s0, sp, 384
 ; RV32-NEXT:    andi sp, sp, -128
-; RV32-NEXT:    li a2, 128
-; RV32-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; RV32-NEXT:    zext.b a1, a1
+; RV32-NEXT:    mv a2, sp
+; RV32-NEXT:    li a3, 128
+; RV32-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
 ; RV32-NEXT:    addi a0, a0, 128
 ; RV32-NEXT:    vle8.v v16, (a0)
+; RV32-NEXT:    add a1, a2, a1
 ; RV32-NEXT:    vmseq.vi v0, v8, 0
-; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    vmerge.vim v24, v8, 1, v0
-; RV32-NEXT:    vmseq.vi v0, v16, 0
-; RV32-NEXT:    zext.b a0, a1
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    vse8.v v24, (a1)
-; RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV32-NEXT:    addi a1, sp, 128
-; RV32-NEXT:    vse8.v v8, (a1)
-; RV32-NEXT:    lbu a0, 0(a0)
+; RV32-NEXT:    vmv.v.i v24, 0
+; RV32-NEXT:    vmseq.vi v8, v16, 0
+; RV32-NEXT:    vmerge.vim v16, v24, 1, v0
+; RV32-NEXT:    vse8.v v16, (a2)
+; RV32-NEXT:    vmv1r.v v0, v8
+; RV32-NEXT:    vmerge.vim v8, v24, 1, v0
+; RV32-NEXT:    addi a0, sp, 128
+; RV32-NEXT:    vse8.v v8, (a0)
+; RV32-NEXT:    lbu a0, 0(a1)
 ; RV32-NEXT:    addi sp, s0, -384
 ; RV32-NEXT:    lw ra, 380(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 376(sp) # 4-byte Folded Reload
@@ -354,23 +355,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    sd s0, 368(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    addi s0, sp, 384
 ; RV64-NEXT:    andi sp, sp, -128
-; RV64-NEXT:    li a2, 128
-; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT:    zext.b a1, a1
+; RV64-NEXT:    mv a2, sp
+; RV64-NEXT:    li a3, 128
+; RV64-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
 ; RV64-NEXT:    addi a0, a0, 128
 ; RV64-NEXT:    vle8.v v16, (a0)
+; RV64-NEXT:    add a1, a2, a1
 ; RV64-NEXT:    vmseq.vi v0, v8, 0
-; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    vmerge.vim v24, v8, 1, v0
-; RV64-NEXT:    vmseq.vi v0, v16, 0
-; RV64-NEXT:    zext.b a0, a1
-; RV64-NEXT:    mv a1, sp
-; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    vse8.v v24, (a1)
-; RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV64-NEXT:    addi a1, sp, 128
-; RV64-NEXT:    vse8.v v8, (a1)
-; RV64-NEXT:    lbu a0, 0(a0)
+; RV64-NEXT:    vmv.v.i v24, 0
+; RV64-NEXT:    vmseq.vi v8, v16, 0
+; RV64-NEXT:    vmerge.vim v16, v24, 1, v0
+; RV64-NEXT:    vse8.v v16, (a2)
+; RV64-NEXT:    vmv1r.v v0, v8
+; RV64-NEXT:    vmerge.vim v8, v24, 1, v0
+; RV64-NEXT:    addi a0, sp, 128
+; RV64-NEXT:    vse8.v v8, (a0)
+; RV64-NEXT:    lbu a0, 0(a1)
 ; RV64-NEXT:    addi sp, s0, -384
 ; RV64-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
@@ -384,23 +386,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    sw s0, 376(sp) # 4-byte Folded Spill
 ; RV32ZBS-NEXT:    addi s0, sp, 384
 ; RV32ZBS-NEXT:    andi sp, sp, -128
-; RV32ZBS-NEXT:    li a2, 128
-; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; RV32ZBS-NEXT:    zext.b a1, a1
+; RV32ZBS-NEXT:    mv a2, sp
+; RV32ZBS-NEXT:    li a3, 128
+; RV32ZBS-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
 ; RV32ZBS-NEXT:    addi a0, a0, 128
 ; RV32ZBS-NEXT:    vle8.v v16, (a0)
+; RV32ZBS-NEXT:    add a1, a2, a1
 ; RV32ZBS-NEXT:    vmseq.vi v0, v8, 0
-; RV32ZBS-NEXT:    vmv.v.i v8, 0
-; RV32ZBS-NEXT:    vmerge.vim v24, v8, 1, v0
-; RV32ZBS-NEXT:    vmseq.vi v0, v16, 0
-; RV32ZBS-NEXT:    zext.b a0, a1
-; RV32ZBS-NEXT:    mv a1, sp
-; RV32ZBS-NEXT:    add a0, a1, a0
-; RV32ZBS-NEXT:    vse8.v v24, (a1)
-; RV32ZBS-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV32ZBS-NEXT:    addi a1, sp, 128
-; RV32ZBS-NEXT:    vse8.v v8, (a1)
-; RV32ZBS-NEXT:    lbu a0, 0(a0)
+; RV32ZBS-NEXT:    vmv.v.i v24, 0
+; RV32ZBS-NEXT:    vmseq.vi v8, v16, 0
+; RV32ZBS-NEXT:    vmerge.vim v16, v24, 1, v0
+; RV32ZBS-NEXT:    vse8.v v16, (a2)
+; RV32ZBS-NEXT:    vmv1r.v v0, v8
+; RV32ZBS-NEXT:    vmerge.vim v8, v24, 1, v0
+; RV32ZBS-NEXT:    addi a0, sp, 128
+; RV32ZBS-NEXT:    vse8.v v8, (a0)
+; RV32ZBS-NEXT:    lbu a0, 0(a1)
 ; RV32ZBS-NEXT:    addi sp, s0, -384
 ; RV32ZBS-NEXT:    lw ra, 380(sp) # 4-byte Folded Reload
 ; RV32ZBS-NEXT:    lw s0, 376(sp) # 4-byte Folded Reload
@@ -414,23 +417,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    sd s0, 368(sp) # 8-byte Folded Spill
 ; RV64ZBS-NEXT:    addi s0, sp, 384
 ; RV64ZBS-NEXT:    andi sp, sp, -128
-; RV64ZBS-NEXT:    li a2, 128
-; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; RV64ZBS-NEXT:    zext.b a1, a1
+; RV64ZBS-NEXT:    mv a2, sp
+; RV64ZBS-NEXT:    li a3, 128
+; RV64ZBS-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
 ; RV64ZBS-NEXT:    addi a0, a0, 128
 ; RV64ZBS-NEXT:    vle8.v v16, (a0)
+; RV64ZBS-NEXT:    add a1, a2, a1
 ; RV64ZBS-NEXT:    vmseq.vi v0, v8, 0
-; RV64ZBS-NEXT:    vmv.v.i v8, 0
-; RV64ZBS-NEXT:    vmerge.vim v24, v8, 1, v0
-; RV64ZBS-NEXT:    vmseq.vi v0, v16, 0
-; RV64ZBS-NEXT:    zext.b a0, a1
-; RV64ZBS-NEXT:    mv a1, sp
-; RV64ZBS-NEXT:    add a0, a1, a0
-; RV64ZBS-NEXT:    vse8.v v24, (a1)
-; RV64ZBS-NEXT:    vmerge.vim v8, v8, 1, v0
-; RV64ZBS-NEXT:    addi a1, sp, 128
-; RV64ZBS-NEXT:    vse8.v v8, (a1)
-; RV64ZBS-NEXT:    lbu a0, 0(a0)
+; RV64ZBS-NEXT:    vmv.v.i v24, 0
+; RV64ZBS-NEXT:    vmseq.vi v8, v16, 0
+; RV64ZBS-NEXT:    vmerge.vim v16, v24, 1, v0
+; RV64ZBS-NEXT:    vse8.v v16, (a2)
+; RV64ZBS-NEXT:    vmv1r.v v0, v8
+; RV64ZBS-NEXT:    vmerge.vim v8, v24, 1, v0
+; RV64ZBS-NEXT:    addi a0, sp, 128
+; RV64ZBS-NEXT:    vse8.v v8, (a0)
+; RV64ZBS-NEXT:    lbu a0, 0(a1)
 ; RV64ZBS-NEXT:    addi sp, s0, -384
 ; RV64ZBS-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
 ; RV64ZBS-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll 
b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
index 67584ba8a82cf..c11319ff335fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll
@@ -143,15 +143,16 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) {
 ; CHECK-LABEL: deinterleave6_0_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vmv.v.i v0, 2
+; CHECK-NEXT:    vmv.v.i v8, 4
 ; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v9, v8, 8
+; CHECK-NEXT:    vslidedown.vi v10, v9, 8
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT:    vmv.v.i v0, 4
-; CHECK-NEXT:    vrgather.vi v8, v9, 4, v0.t
-; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    vslidedown.vi v9, v9, 5, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vrgather.vi v9, v10, 4, v0.t
+; CHECK-NEXT:    vse8.v v9, (a1)
 ; CHECK-NEXT:    ret
 entry:
   %0 = load <16 x i8>, ptr %in, align 1
@@ -187,15 +188,16 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) {
 ; CHECK-LABEL: deinterleave7_0_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vmv.v.i v0, 2
+; CHECK-NEXT:    vmv.v.i v8, 4
 ; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v9, v8, 8
+; CHECK-NEXT:    vslidedown.vi v10, v9, 8
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 6, v0.t
-; CHECK-NEXT:    vmv.v.i v0, 4
-; CHECK-NEXT:    vrgather.vi v8, v9, 6, v0.t
-; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    vslidedown.vi v9, v9, 6, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vrgather.vi v9, v10, 6, v0.t
+; CHECK-NEXT:    vse8.v v9, (a1)
 ; CHECK-NEXT:    ret
 entry:
   %0 = load <16 x i8>, ptr %in, align 1

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll 
b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index ad2ed47e67e64..206838917d004 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -153,19 +153,20 @@ define <vscale x 2 x i32> 
@vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
 ; NO_FOLDING:       # %bb.0:
 ; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
 ; NO_FOLDING-NEXT:    vlm.v v8, (a0)
-; NO_FOLDING-NEXT:    vmv.v.i v10, 0
+; NO_FOLDING-NEXT:    vlm.v v9, (a1)
+; NO_FOLDING-NEXT:    vlm.v v10, (a2)
+; NO_FOLDING-NEXT:    vmv.v.i v11, 0
 ; NO_FOLDING-NEXT:    vmv.v.v v0, v8
-; NO_FOLDING-NEXT:    vmerge.vim v11, v10, -1, v0
-; NO_FOLDING-NEXT:    vlm.v v0, (a1)
-; NO_FOLDING-NEXT:    vlm.v v9, (a2)
-; NO_FOLDING-NEXT:    vmerge.vim v12, v10, -1, v0
+; NO_FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
 ; NO_FOLDING-NEXT:    vmv.v.v v0, v9
-; NO_FOLDING-NEXT:    vmerge.vim v9, v10, -1, v0
-; NO_FOLDING-NEXT:    vmul.vv v10, v11, v12
-; NO_FOLDING-NEXT:    vsub.vv v11, v11, v9
+; NO_FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
+; NO_FOLDING-NEXT:    vmv.v.v v0, v10
+; NO_FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
+; NO_FOLDING-NEXT:    vmul.vv v9, v12, v9
+; NO_FOLDING-NEXT:    vsub.vv v11, v12, v10
 ; NO_FOLDING-NEXT:    vmv.v.v v0, v8
-; NO_FOLDING-NEXT:    vadd.vi v9, v9, -1, v0.t
-; NO_FOLDING-NEXT:    vor.vv v8, v10, v9
+; NO_FOLDING-NEXT:    vadd.vi v10, v10, -1, v0.t
+; NO_FOLDING-NEXT:    vor.vv v8, v9, v10
 ; NO_FOLDING-NEXT:    vor.vv v8, v8, v11
 ; NO_FOLDING-NEXT:    ret
 ;
@@ -173,19 +174,20 @@ define <vscale x 2 x i32> 
@vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
 ; FOLDING:       # %bb.0:
 ; FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
 ; FOLDING-NEXT:    vlm.v v8, (a0)
-; FOLDING-NEXT:    vmv.v.i v10, 0
+; FOLDING-NEXT:    vlm.v v9, (a1)
+; FOLDING-NEXT:    vlm.v v10, (a2)
+; FOLDING-NEXT:    vmv.v.i v11, 0
 ; FOLDING-NEXT:    vmv.v.v v0, v8
-; FOLDING-NEXT:    vmerge.vim v11, v10, -1, v0
-; FOLDING-NEXT:    vlm.v v0, (a1)
-; FOLDING-NEXT:    vlm.v v9, (a2)
-; FOLDING-NEXT:    vmerge.vim v12, v10, -1, v0
+; FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
 ; FOLDING-NEXT:    vmv.v.v v0, v9
-; FOLDING-NEXT:    vmerge.vim v9, v10, -1, v0
-; FOLDING-NEXT:    vmul.vv v10, v11, v12
-; FOLDING-NEXT:    vsub.vv v11, v11, v9
+; FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
+; FOLDING-NEXT:    vmv.v.v v0, v10
+; FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
+; FOLDING-NEXT:    vmul.vv v9, v12, v9
+; FOLDING-NEXT:    vsub.vv v11, v12, v10
 ; FOLDING-NEXT:    vmv.v.v v0, v8
-; FOLDING-NEXT:    vadd.vi v9, v9, -1, v0.t
-; FOLDING-NEXT:    vor.vv v8, v10, v9
+; FOLDING-NEXT:    vadd.vi v10, v10, -1, v0.t
+; FOLDING-NEXT:    vor.vv v8, v9, v10
 ; FOLDING-NEXT:    vor.vv v8, v8, v11
 ; FOLDING-NEXT:    ret
   %a = load <vscale x 2 x i1>, ptr %x
@@ -207,19 +209,20 @@ define <vscale x 2 x i8> 
@vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
 ; NO_FOLDING:       # %bb.0:
 ; NO_FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
 ; NO_FOLDING-NEXT:    vlm.v v8, (a0)
-; NO_FOLDING-NEXT:    vmv.v.i v10, 0
+; NO_FOLDING-NEXT:    vlm.v v9, (a1)
+; NO_FOLDING-NEXT:    vlm.v v10, (a2)
+; NO_FOLDING-NEXT:    vmv.v.i v11, 0
 ; NO_FOLDING-NEXT:    vmv1r.v v0, v8
-; NO_FOLDING-NEXT:    vmerge.vim v11, v10, -1, v0
-; NO_FOLDING-NEXT:    vlm.v v0, (a1)
-; NO_FOLDING-NEXT:    vlm.v v9, (a2)
-; NO_FOLDING-NEXT:    vmerge.vim v12, v10, -1, v0
+; NO_FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
 ; NO_FOLDING-NEXT:    vmv1r.v v0, v9
-; NO_FOLDING-NEXT:    vmerge.vim v9, v10, -1, v0
-; NO_FOLDING-NEXT:    vmul.vv v10, v11, v12
-; NO_FOLDING-NEXT:    vsub.vv v11, v11, v9
+; NO_FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
+; NO_FOLDING-NEXT:    vmv1r.v v0, v10
+; NO_FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
+; NO_FOLDING-NEXT:    vmul.vv v9, v12, v9
+; NO_FOLDING-NEXT:    vsub.vv v11, v12, v10
 ; NO_FOLDING-NEXT:    vmv1r.v v0, v8
-; NO_FOLDING-NEXT:    vadd.vi v9, v9, -1, v0.t
-; NO_FOLDING-NEXT:    vor.vv v8, v10, v9
+; NO_FOLDING-NEXT:    vadd.vi v10, v10, -1, v0.t
+; NO_FOLDING-NEXT:    vor.vv v8, v9, v10
 ; NO_FOLDING-NEXT:    vor.vv v8, v8, v11
 ; NO_FOLDING-NEXT:    ret
 ;
@@ -227,19 +230,20 @@ define <vscale x 2 x i8> 
@vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
 ; FOLDING:       # %bb.0:
 ; FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
 ; FOLDING-NEXT:    vlm.v v8, (a0)
-; FOLDING-NEXT:    vmv.v.i v10, 0
+; FOLDING-NEXT:    vlm.v v9, (a1)
+; FOLDING-NEXT:    vlm.v v10, (a2)
+; FOLDING-NEXT:    vmv.v.i v11, 0
 ; FOLDING-NEXT:    vmv1r.v v0, v8
-; FOLDING-NEXT:    vmerge.vim v11, v10, -1, v0
-; FOLDING-NEXT:    vlm.v v0, (a1)
-; FOLDING-NEXT:    vlm.v v9, (a2)
-; FOLDING-NEXT:    vmerge.vim v12, v10, -1, v0
+; FOLDING-NEXT:    vmerge.vim v12, v11, -1, v0
 ; FOLDING-NEXT:    vmv1r.v v0, v9
-; FOLDING-NEXT:    vmerge.vim v9, v10, -1, v0
-; FOLDING-NEXT:    vmul.vv v10, v11, v12
-; FOLDING-NEXT:    vsub.vv v11, v11, v9
+; FOLDING-NEXT:    vmerge.vim v9, v11, -1, v0
+; FOLDING-NEXT:    vmv1r.v v0, v10
+; FOLDING-NEXT:    vmerge.vim v10, v11, -1, v0
+; FOLDING-NEXT:    vmul.vv v9, v12, v9
+; FOLDING-NEXT:    vsub.vv v11, v12, v10
 ; FOLDING-NEXT:    vmv1r.v v0, v8
-; FOLDING-NEXT:    vadd.vi v9, v9, -1, v0.t
-; FOLDING-NEXT:    vor.vv v8, v10, v9
+; FOLDING-NEXT:    vadd.vi v10, v10, -1, v0.t
+; FOLDING-NEXT:    vor.vv v8, v9, v10
 ; FOLDING-NEXT:    vor.vv v8, v8, v11
 ; FOLDING-NEXT:    ret
   %a = load <vscale x 2 x i1>, ptr %x
@@ -440,14 +444,16 @@ define <vscale x 2 x i32> 
@vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y,
 ; NO_FOLDING:       # %bb.0:
 ; NO_FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
 ; NO_FOLDING-NEXT:    vlm.v v0, (a0)
-; NO_FOLDING-NEXT:    vmv.v.i v8, 0
-; NO_FOLDING-NEXT:    vmerge.vim v9, v8, 1, v0
-; NO_FOLDING-NEXT:    vlm.v v0, (a2)
-; NO_FOLDING-NEXT:    vmerge.vim v8, v8, 1, v0
-; NO_FOLDING-NEXT:    vlm.v v0, (a1)
-; NO_FOLDING-NEXT:    vadd.vv v10, v9, v8
-; NO_FOLDING-NEXT:    vsub.vv v8, v9, v8
-; NO_FOLDING-NEXT:    vor.vv v10, v10, v9, v0.t
+; NO_FOLDING-NEXT:    vlm.v v8, (a2)
+; NO_FOLDING-NEXT:    vlm.v v9, (a1)
+; NO_FOLDING-NEXT:    vmv.v.i v10, 0
+; NO_FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
+; NO_FOLDING-NEXT:    vmv.v.v v0, v8
+; NO_FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
+; NO_FOLDING-NEXT:    vadd.vv v10, v11, v8
+; NO_FOLDING-NEXT:    vsub.vv v8, v11, v8
+; NO_FOLDING-NEXT:    vmv.v.v v0, v9
+; NO_FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
 ; NO_FOLDING-NEXT:    vor.vv v8, v10, v8
 ; NO_FOLDING-NEXT:    ret
 ;
@@ -455,14 +461,16 @@ define <vscale x 2 x i32> 
@vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y,
 ; FOLDING:       # %bb.0:
 ; FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, mu
 ; FOLDING-NEXT:    vlm.v v0, (a0)
-; FOLDING-NEXT:    vmv.v.i v8, 0
-; FOLDING-NEXT:    vmerge.vim v9, v8, 1, v0
-; FOLDING-NEXT:    vlm.v v0, (a2)
-; FOLDING-NEXT:    vmerge.vim v8, v8, 1, v0
-; FOLDING-NEXT:    vlm.v v0, (a1)
-; FOLDING-NEXT:    vadd.vv v10, v9, v8
-; FOLDING-NEXT:    vsub.vv v8, v9, v8
-; FOLDING-NEXT:    vor.vv v10, v10, v9, v0.t
+; FOLDING-NEXT:    vlm.v v8, (a2)
+; FOLDING-NEXT:    vlm.v v9, (a1)
+; FOLDING-NEXT:    vmv.v.i v10, 0
+; FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
+; FOLDING-NEXT:    vmv.v.v v0, v8
+; FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
+; FOLDING-NEXT:    vadd.vv v10, v11, v8
+; FOLDING-NEXT:    vsub.vv v8, v11, v8
+; FOLDING-NEXT:    vmv.v.v v0, v9
+; FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
 ; FOLDING-NEXT:    vor.vv v8, v10, v8
 ; FOLDING-NEXT:    ret
   %a = load <vscale x 2 x i1>, ptr %x
@@ -484,14 +492,16 @@ define <vscale x 2 x i8> 
@vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, p
 ; NO_FOLDING:       # %bb.0:
 ; NO_FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
 ; NO_FOLDING-NEXT:    vlm.v v0, (a0)
-; NO_FOLDING-NEXT:    vmv.v.i v8, 0
-; NO_FOLDING-NEXT:    vmerge.vim v9, v8, 1, v0
-; NO_FOLDING-NEXT:    vlm.v v0, (a2)
-; NO_FOLDING-NEXT:    vmerge.vim v8, v8, 1, v0
-; NO_FOLDING-NEXT:    vlm.v v0, (a1)
-; NO_FOLDING-NEXT:    vadd.vv v10, v9, v8
-; NO_FOLDING-NEXT:    vsub.vv v8, v9, v8
-; NO_FOLDING-NEXT:    vor.vv v10, v10, v9, v0.t
+; NO_FOLDING-NEXT:    vlm.v v8, (a2)
+; NO_FOLDING-NEXT:    vlm.v v9, (a1)
+; NO_FOLDING-NEXT:    vmv.v.i v10, 0
+; NO_FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
+; NO_FOLDING-NEXT:    vmv1r.v v0, v8
+; NO_FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
+; NO_FOLDING-NEXT:    vadd.vv v10, v11, v8
+; NO_FOLDING-NEXT:    vsub.vv v8, v11, v8
+; NO_FOLDING-NEXT:    vmv1r.v v0, v9
+; NO_FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
 ; NO_FOLDING-NEXT:    vor.vv v8, v10, v8
 ; NO_FOLDING-NEXT:    ret
 ;
@@ -499,14 +509,16 @@ define <vscale x 2 x i8> 
@vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, p
 ; FOLDING:       # %bb.0:
 ; FOLDING-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
 ; FOLDING-NEXT:    vlm.v v0, (a0)
-; FOLDING-NEXT:    vmv.v.i v8, 0
-; FOLDING-NEXT:    vmerge.vim v9, v8, 1, v0
-; FOLDING-NEXT:    vlm.v v0, (a2)
-; FOLDING-NEXT:    vmerge.vim v8, v8, 1, v0
-; FOLDING-NEXT:    vlm.v v0, (a1)
-; FOLDING-NEXT:    vadd.vv v10, v9, v8
-; FOLDING-NEXT:    vsub.vv v8, v9, v8
-; FOLDING-NEXT:    vor.vv v10, v10, v9, v0.t
+; FOLDING-NEXT:    vlm.v v8, (a2)
+; FOLDING-NEXT:    vlm.v v9, (a1)
+; FOLDING-NEXT:    vmv.v.i v10, 0
+; FOLDING-NEXT:    vmerge.vim v11, v10, 1, v0
+; FOLDING-NEXT:    vmv1r.v v0, v8
+; FOLDING-NEXT:    vmerge.vim v8, v10, 1, v0
+; FOLDING-NEXT:    vadd.vv v10, v11, v8
+; FOLDING-NEXT:    vsub.vv v8, v11, v8
+; FOLDING-NEXT:    vmv1r.v v0, v9
+; FOLDING-NEXT:    vor.vv v10, v10, v11, v0.t
 ; FOLDING-NEXT:    vor.vv v8, v10, v8
 ; FOLDING-NEXT:    ret
   %a = load <vscale x 2 x i1>, ptr %x

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll 
b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
index 30044ad580143..9cdec6a9ff2e9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
@@ -494,17 +494,17 @@ define <vscale x 8 x double> @vfmerge_nzv_nxv8f64(<vscale 
x 8 x double> %va, <vs
 define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> 
%va, <vscale x 16 x double> %vb) {
 ; CHECK-LABEL: vselect_combine_regression:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; CHECK-NEXT:    vmv8r.v v24, v16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vmseq.vi v24, v16, 0
 ; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    vmseq.vi v0, v24, 0
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    vle64.v v16, (a0), v0.t
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vle64.v v16, (a1), v0.t
 ; CHECK-NEXT:    ret
   %cond = icmp eq <vscale x 16 x i64> %va, zeroinitializer
   %sel = select <vscale x 16 x i1> %cond, <vscale x 16 x double> %vb, <vscale 
x 16 x double> zeroinitializer


        
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to