The following plugs another hole where we cache a failed SLP build
attempt with an all-success 'matches'.  It also adds checking that
we don't do that.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-06-21  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/101121
        * tree-vect-slp.c (vect_build_slp_tree_2): To not fail fatally
        when we just lack a stmt with the desired op when doing permutation.
        (vect_build_slp_tree): When caching a failed SLP build attempt
        assert that at least one lane is marked as not matching.

        * gfortran.dg/pr101121.f: New testcase.
---
 gcc/testsuite/gfortran.dg/pr101121.f | 203 +++++++++++++++++++++++++++
 gcc/tree-vect-slp.c                  |  18 ++-
 2 files changed, 218 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr101121.f

diff --git a/gcc/testsuite/gfortran.dg/pr101121.f 
b/gcc/testsuite/gfortran.dg/pr101121.f
new file mode 100644
index 00000000000..b623ac10794
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr101121.f
@@ -0,0 +1,203 @@
+! { dg-do compile }
+! { dg-options "-Ofast -std=legacy" }
+! { dg-additional-options "-march=haswell" { target x86_64-*-* i?86-*-* } }
+      COMMON /JMSG80/ PI4,PIF,P120,R12,P340,R34,FCS(4,3),E34MAX,
+     7                IJSAME,KLSAME,IKSMJL
+      DIMENSION P1(3),FQ(0:5),F1(0:4),F2(0:4),WS(8),WP(8)
+      DIMENSION VEA(12),VES(9),WES(6)
+      DIMENSION T(0:20),U(0:20)
+      DIMENSION T3R(6,3,3,3),T9B(0:20,3,3,3)
+      DIMENSION F5X(0:12,3,3,3),F6X(0: 6,3,3,3,3)
+      DIMENSION A31(0:3,0:3),C31(2,0:3),A32(0:3,0:3),C32(2,0:3)
+      DIMENSION A41(0:3,0:3),C41(2,0:3),A42(0:3,0:3),C42(2,0:3)
+      DIMENSION A33(16),FIJ00(2),A43(16),FI0K0(2)
+      DIMENSION SEJJK0(  3),A54(16,  3),C54(2,  3)
+      DIMENSION A56(0:22,3,0:3),C56(2,0:3)
+      DIMENSION A60(0:3,0:3),C60(2,0:3),A61(0:3,0:3),C61(2,0:3)
+      DIMENSION A62(16),FI00L(2),A63(16),F0J0L(2)
+      DIMENSION A64(0:3,0:3),C64(2,  3),A65(0:3,0:3),C65(2,  3)
+      DIMENSION A69(0:3,  3),C69(2,0:3),A70(0:3,  3),C70(2,0:3)
+      DIMENSION A71(18,  3),C71(2,  3)
+      DIMENSION A72(18,  3),C72(2,  3)
+      DIMENSION A73(18,0:3),C73(2,0:3)
+      DIMENSION SE0LKL(  3),A75(16,3),C75(2,0:3)
+      DIMENSION SE0JLL(  3),A76(16,3),C76(2,0:3)
+      DIMENSION A77(0:25,3,0:3),C77(2,0:3),A78(0:31,3,0:3),C78(2,0:3)
+      DIMENSION A79(0:31,3,0:3),C79(2,0:3)
+      DIMENSION A80(0: 2,2),A81(0:24,3),A82(0:31,2),A83(0:22,2)
+      DIMENSION A84(0:13,2),A85(0:13,2),A86(0: 6)
+      DIMENSION S4(0:14),Q4(0:4),FIJKL(2)
+                  IF(XVA.LT.CUG) THEN
+                  ENDIF
+                     F1(M)= FQ0*TMP
+                     F2(M)= FQ0*TMP
+                  XX1=-X12*X43
+      IF(JI.EQ.1) THEN
+            DO 255 J=1,3
+  255    CONTINUE
+         DO 268 K=1,3
+            SEJJK00= E0+E(2,2,K,0)+E(3,3,K,0)
+            A54( 5,K)= A540
+  268    CONTINUE
+  297       F5X(3+M,I,I,I)=-R3(M,I,I,I)
+            DO 299 J=1,3
+                     F5X(3+M,I,I,J)=-R3(M,J,I,I)
+  299    CONTINUE
+         DO 300 L=0,M56
+            DO 300 M=1,3
+  300    A56(N,M,L)= ZER
+               A60(2,L)= A600+P34(I,3)*E(I,0,0,L)
+               A61(0,L)= A610+D1I     *E(L,0,0,I)
+               A61(1,L)= A610+P12(I,3)*E(L,0,0,I)
+         SEL00L= E(1,0,0,1)+E(2,0,0,2)+E(3,0,0,3)
+               IF(I.NE.J) THEN
+                  K=6-I-J
+                  F6X(0,J,I,I,I)= ZER
+                  F6X(0,I,J,I,I)= ZER
+                  F6X(0,I,I,J,I)= ZER
+                  F6X(0,I,I,I,J)= ZER
+                     F6X(M,I,I,K,J)= R2(M,K,J)
+               ENDIF
+  391       A82( M,N)= ZER
+  392       A83( M,N)= ZER
+               A84(M,N)= ZER
+               A85(M,N)= ZER
+  397    A86( M)= ZER
+         DO 399 K=1,3
+            DO 399 J=1,3
+                  DO 398 M=1,6
+                     T9B(M+ 2,I,J,K)= T3R0
+                     T9B(M+ 8,I,J,K)= T1R(M,I,J,K)
+                     T9B(M+14,I,J,K)= T3R0
+  398             CONTINUE
+  399    CONTINUE
+  417                A77( M,3,K)= A770+F5X0*GEIJKL
+  445                A81( M,3) = A81( M,3)+T( M)*TMP
+                     IF(K.EQ.L)A81( 5,3)=A81( 5,3)+TMP
+                     IF(I.EQ.J) THEN
+                        DO 447 M=6,11
+  447                   A81( M,3) = A81( M,3)+T( M)*GEIJKL
+                     ENDIF
+      ENDIF
+      IF(LK.EQ.1) THEN
+         IF(JTYPE.NE.4) THEN
+            DO 510 J=0,3
+               A31(3,J)= A310+ A310*Y02
+               A32(3,J)= A320+ A320*Y02
+  510       CONTINUE
+            A33( 6)=-AEIJ00*Y1Y+T01
+            A33( 7)= A330-0*Y01+T01
+            A33( 8)= A330- A330*Y01
+            A33(15)= A330+0*Y02
+            A33(16)= A330+ A330*Y02
+         ENDIF
+            A84(12,N)= A84( 7,N)+ A84( 8,N)*Y02
+            A84(13,N)= A84( 9,N)
+         A85(10,2)= A85(10,2)- A85(10,1)+ A850
+         A85(11,2)= A85(11,2)- A85(11,1)+ A850
+         A85(12,2)= A85(12,2)- A85(12,1)+ A850
+         A85(13,2)= A85(13,2)- A85(13,1)
+         Q4(0)= S4( 0)+(S4( 1)+(S4( 2)+(S4( 3)+S4( 4)*Y02)*Y02)*Y02)*Y02
+         Q4(1)= S4( 5)+(S4( 6)+(S4( 7)+ S4( 8)*Y02)*Y02)*Y02
+         Q4(2)= S4( 9)+(S4(10)+ S4(11)*Y02)*Y02
+         Q4(3)= S4(12)+ S4(13)*Y02
+         Q4(4)= S4(14)
+      ENDIF
+      IF(JTYPE.NE.4) THEN
+      ENDIF
+         C42(1,M)= T0*F10-T0*F10
+         C42(2,M)= T0*F20-T0*F20
+      T(1)= T01+(A43( 4)- A43( 5)*Y04)*Y04
+      F0J0L(2)= T(0)*F20-T0*F20+T(2)*F20
+      DO 660 N=1,3
+         T(0)= A64(3,N)- A64(0,N)- A64(1,N)
+         T(1)= A640- A640*Y04
+         C64(1,N)= T0*F10-T0*F10
+         C64(2,N)= T0*F20-T0*F2(1)
+         C65(1,N)= T0*F10-T0*F10
+         C65(2,N)= T0*F20-T0*F2(1)
+         C70(2,N)= T0*F20-T0*F20
+         T(2)=(A71(17,N)-(A71(18,N)- A71(16,N)*Y04)*Y04)*XX1
+         C71(1,N)= T0*F10-T0*F10+T0*F10
+         C71(2,N)= T0*F20-T0*F20+T0*F20
+         T(1)=(A720+ A720- A720-T0)*XX1
+         C72(1,N)= T0*F10-T0*F10+T0*F10
+         C75(1,N)= T(0)*F10-T0*F1(1)+T(2)*F1(2)
+         C75(2,N)= T(0)*F20-T0*F2(1)+T(2)*F2(2)
+         T01 = A76( 6,N)*XX1
+         T(1)=(T01- A760-(A760- A76( 7,N)- A76( 8,N)
+     2                       -(A760+ A76( 3,N))*Y04)*Y04)*XX1
+  660 CONTINUE
+         C73(2,M)= T0*F20+T0*F20+T(2)*F20
+     2       +(A77(23,1,M)+ A77(24,1,M)*Y04)*Y03
+         T(2)=(A77(21,2,M)+(A77(22,2,M)+ A77(23,2,M)*Y04)*Y04)*XX1
+     2       -(A77(24,2,M)-(A77(25,2,M)+ A77(20,2,M)*Y04)*Y04)*Y03
+         T(3)=(A77(21,3,M)+(A77(22,3,M)+(A77(23,3,M)
+     2                                 + A77(24,3,M)*Y04)*Y04)*Y04)*XX1
+         C77(1,M)= T0*F10-T0*F10-T0*F10+T0*F10
+         C77(2,M)= T(0)*F20-T(1)*F20-T(2)*F20+T(3)*F20
+         T(0)=(A780+ A78(24,1,M))*Y3Y+ A780*XX1
+         T(1)=(A78(23,1,M)+(A78(21,1,M)+A78(22,1,M)*Y04)*Y04)*XX1
+     2       +(A78(25,1,M)+ A78(26,1,M)*Y04)*Y3Y- A78(27,1,M)*XX2
+         T(2)=(A78(21,2,M)+(A78(22,2,M)+ A78(28,2,M)*Y04)*Y04)*XX2
+     2       +(A78(23,2,M)-(A78(24,2,M)+ A78(25,2,M)*Y04)*Y04)*XX1
+     3       -(A78(29,2,M)-(A78(30,2,M)+ A78(31,2,M)*Y04)*Y04)*Y41
+         T(3)=(A78(21,3,M)+(A78(22,3,M)+(A78(23,3,M)
+     2                                 + A78(24,3,M)*Y04)*Y04)*Y04)*XX2
+         C78(1,M)= T0*F10-T0*F1(1)-T(2)*F1(2)+T(3)*F1(3)
+         C78(2,M)= T(0)*F2(0)-T(1)*F2(1)-T(2)*F2(2)+T(3)*F2(3)
+         T(0)=-A79(24,1,M)*Y3Y+ A79(30,1,M)*H43
+         T(1)=(A79(21,1,M)-(A79(23,1,M)+ A79(22,1,M)*Y04)*Y04)*XX1
+     2       +(A79(25,1,M)+ A79(26,1,M)*Y04)*Y3Y- A79(29,1,M)*XX2
+         T(2)=(A79(21,2,M)+(A79(22,2,M)- A79(28,2,M)*Y04)*Y04)*XX2
+     2       +(A79(23,2,M)-(A79(24,2,M)+ A79(25,2,M)*Y04)*Y04)*XX1
+     3       -(A79(29,2,M)-(A79(30,2,M)+ A79(31,2,M)*Y04)*Y04)*Y41
+         T(3)=(A79(21,3,M)+(A79(22,3,M)+(A79(23,3,M)
+     2                                 + A79(24,3,M)*Y04)*Y04)*Y04)*XX2
+         C79(1,M)= T(0)*F1(0)-T(1)*F1(1)-T(2)*F1(2)+T(3)*F1(3)
+         C79(2,M)= T(0)*F2(0)-T(1)*F2(1)-T(2)*F2(2)+T(3)*F2(3)
+      T(0)= A80( 2,1)*Y3Y+ A80( 2,2)*H43
+      T(1)=(A81(16,1)+(A81(14,1)+A81(15,1)*Y04)*Y04)*XX1
+     2    +(A81(18,1)+ A81(19,1)*Y04)*Y3Y- A81(20,1)*XX2
+      T(2)=(A82(21,1)+(A82(22,1)+ A82(28,1)*Y04)*Y04)*XX2
+     2    +(A82(23,1)-(A82(24,1)+ A82(25,1)*Y04)*Y04)*XX1
+     3    +(A83(15,2)+(A83(16,2)+ A82(31,1)*Y04)*Y04)*Y41
+     4    -(A83(17,2)-(A83(18,2)- A83(19,2)*Y04)*Y04)*Y3Y
+      T(3)=(A84(10,1)+(A84(11,1)+(A84(12,1)+A84(13,1)*Y04)*Y04)*Y04)*XX2
+     2    +(A85(10,1)+(A85(10,2)+(A85(11,2)+(A85(12,2)
+     3                          + A85(13,2)*Y04)*Y04)*Y04)*Y04)*XX1
+      T(4)=(Q4(0)+(Q4(1)+(Q4(2)+(Q4(3)+Q4(4)*Y04)*Y04)*Y04)*Y04)*XX2
+      FIJKL(1)= T(0)*F1(0)-T(1)*F1(1)-T(2)*F1(2)+T(3)*F1(3)+T(4)*F1(4)
+      FIJKL(2)= T(0)*F2(0)-T(1)*F2(1)-T(2)*F2(2)+T(3)*F2(3)+T(4)*F2(4)
+                  DO 800 ICP=1,2
+                     VE0= VE0+C61(ICP,0)*WP(1)+FI00L(ICP)*WP(2)
+     2                       +F0J0L(ICP)*WP(3)+C77(ICP,0)*WP(4)
+     3                       +C73(ICP,0)*WP(5)+C78(ICP,0)*WP(6)
+     4                       +C79(ICP,0)*WP(7)+FIJKL(ICP)*WP(8)
+                     IF(ICP.EQ.1) THEN
+                        DO 720 M=1,3
+                           VES(  M)= VES(  M)+VEA(  M)
+                           VES(3+M)= VES(3+M)+VEA(3+M)
+                           T01 = VEA(6+M)
+                           T02 = VEA(9+M)
+                           WES(  M)=-T01+(T01+T02)*Y03
+  720                   CONTINUE
+                        FE1= X24*VE0
+                        DO 730 M=1,3
+                           T01 = VEA(  M)+VEA(3+M)
+                           T02 = VEA(6+M)+VEA(9+M)
+                           WES(3+M)=-T01+(T01+T02)*Z02
+  730                   CONTINUE
+                     ENDIF
+  800             CONTINUE
+                     WES(  M)= WES(  M)+P34(M,3)*FE0
+                     WES(3+M)= WES(3+M)-P1(M)*FE1
+                     VES(6+M)= VES(6+M)+WES(3+M)
+                     FCS(3,M)= FCS(3,M)-WES(3+M)*Y03+WES(  M)
+            DO 925 M=1,3
+               T01= VES(  M)
+               T02= VES(3+M)+VES(6+M)
+               T01=-T01+(T01+T02)*Y01+P12(M,3)*TMP
+               FCS(2,M)= FCS(2,M)-T01+VES(6+M)
+  925       CONTINUE
+      END
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 4e2ecc5b2e5..a32f86b8bc7 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1591,6 +1591,14 @@ vect_build_slp_tree (vec_info *vinfo,
       SLP_TREE_SCALAR_STMTS (res) = vNULL;
       SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
       res->failed = XNEWVEC (bool, group_size);
+      if (flag_checking)
+       {
+         unsigned i;
+         for (i = 0; i < group_size; ++i)
+           if (!matches[i])
+             break;
+         gcc_assert (i < group_size);
+       }
       memcpy (res->failed, matches, sizeof (bool) * group_size);
     }
   else
@@ -1898,10 +1906,14 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          chains.quick_push (chain.copy ());
          chain.truncate (0);
        }
-      if (chains.length () == group_size
-         /* We cannot yet use SLP_TREE_CODE to communicate the operation.  */
-         && op_stmt_info)
+      if (chains.length () == group_size)
        {
+         /* We cannot yet use SLP_TREE_CODE to communicate the operation.  */
+         if (!op_stmt_info)
+           {
+             hard_fail = false;
+             goto out;
+           }
          /* Now we have a set of chains with the same length.  */
          /* 1. pre-sort according to def_type and operation.  */
          for (unsigned lane = 0; lane < group_size; ++lane)
-- 
2.26.2

Reply via email to