Hi,

in PR112406 Tamar found another problem with COND_OP reductions.
I wrongly assumed that the reduction variable will always remain in
operand 1, just as we create the COND_OP in ifcvt.  But of course,
addition being commutative, we are free to swap operand 1 and 2 and
can end up with e.g.

 _ifc__60 = .COND_ADD (_2, _6, MADPictureC1_lsm.10_25, MADPictureC1_lsm.10_25);

which does not pass the asserts I put in place.

This patch removes this restriction and allows the reduction index to be
2 as well.

Bootstrapped and regtested on aarch64 and regtested on riscv.  x86 is
still running.

Regards
 Robin

gcc/ChangeLog:

        PR middle-end/112406

        * tree-vect-loop.cc (vectorize_fold_left_reduction): Allow
        reduction index != 1.
        (vect_transform_reduction): Handle reduction index != 1.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/pr112406-2.c: New test.
---
 gcc/testsuite/gcc.target/aarch64/pr112406-2.c | 20 +++++++++++++++++++
 gcc/tree-vect-loop.cc                         | 15 +++++++++-----
 2 files changed, 30 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr112406-2.c

diff --git a/gcc/testsuite/gcc.target/aarch64/pr112406-2.c 
b/gcc/testsuite/gcc.target/aarch64/pr112406-2.c
new file mode 100644
index 00000000000..bb6e9cf7c70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr112406-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-options "-march=armv8-a+sve -Ofast" } */
+
+double MADPictureC1;
+extern int PictureRejected[];
+int PictureMAD_0, MADModelEstimator_n_windowSize_i, 
MADModelEstimator_n_windowSize_oneSampleQ;
+
+void MADModelEstimator_n_windowSize() {
+  int estimateX2 = 0;
+  for (; MADModelEstimator_n_windowSize_i; MADModelEstimator_n_windowSize_i++) 
{
+    if (MADModelEstimator_n_windowSize_oneSampleQ &&
+        !PictureRejected[MADModelEstimator_n_windowSize_i])
+      estimateX2 = 1;
+    if (!PictureRejected[MADModelEstimator_n_windowSize_i])
+      MADPictureC1 += PictureMAD_0;
+  }
+  if (estimateX2)
+    for (;;)
+      ;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 58679e91c0a..044eacddf7e 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7070,7 +7070,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
     op0 = ops[1 - reduc_index];
   else
     {
-      op0 = ops[2];
+      op0 = ops[2 + (1 - reduc_index)];
       opmask = ops[0];
       gcc_assert (!slp_node);
     }
@@ -8455,7 +8455,9 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
       gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
                  || code == IFN_COND_MUL || code == IFN_COND_AND
                  || code == IFN_COND_IOR || code == IFN_COND_XOR);
-      gcc_assert (op.num_ops == 4 && (op.ops[1] == op.ops[3]));
+      gcc_assert (op.num_ops == 4
+                 && (op.ops[reduc_index]
+                     == op.ops[internal_fn_else_index ((internal_fn) code)]));
     }
 
   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
@@ -8498,12 +8500,15 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
     {
       /* For a conditional operation pass the truth type as mask
         vectype.  */
-      gcc_assert (single_defuse_cycle && reduc_index == 1);
+      gcc_assert (single_defuse_cycle
+                 && (reduc_index == 1 || reduc_index == 2));
       vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
                         op.ops[0], &vec_oprnds0,
                         truth_type_for (vectype_in),
-                        NULL_TREE, &vec_oprnds1, NULL_TREE,
-                        op.ops[2], &vec_oprnds2, NULL_TREE);
+                        reduc_index == 1 ? NULL_TREE : op.ops[1],
+                        &vec_oprnds1, NULL_TREE,
+                        reduc_index == 2 ? NULL_TREE : op.ops[2],
+                        &vec_oprnds2, NULL_TREE);
     }
 
   /* For single def-use cycles get one copy of the vectorized reduction
-- 
2.42.0

Reply via email to