https://gcc.gnu.org/g:18a85f9107df49f191a713e191150a9e7c902b04

commit r16-5414-g18a85f9107df49f191a713e191150a9e7c902b04
Author: Richard Biener <[email protected]>
Date:   Wed Nov 19 10:33:05 2025 +0100

    tree-optimization/122747 - fix masking of loops with conditional reduction 
ops
    
    The following amends the earlier fix in r16-5372-gfacb92812a4ec5 to
    also cover the case we're not doing the merge to a single cycle
    but go via vectorizable_call.
    
            PR tree-optimization/122747
            * tree-vect-stmts.cc (vectorizable_call): Handle reduction
            operations that are already conditional.
    
            * gcc.target/i386/vect-epilogues-10.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.target/i386/vect-epilogues-10.c | 20 ++++++++++++++++++++
 gcc/tree-vect-stmts.cc                            | 17 ++++++++++++-----
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/vect-epilogues-10.c 
b/gcc/testsuite/gcc.target/i386/vect-epilogues-10.c
new file mode 100644
index 000000000000..a187955e6f17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-epilogues-10.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 --param 
vect-partial-vector-usage=1 -fdump-tree-vect-optimized" } */
+
+double foo (double *a, long long *mask, int n)
+{
+  double sum = 0.0;
+  for (int i = 0; i < n; ++i)
+    {
+      double val;
+      if (mask[i])
+        val = a[i];
+      else
+        val = -0.0;
+      sum = sum + val;
+    }
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump "optimized: loop vectorized using 64 byte 
vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: epilogue loop vectorized using 
masked 64 byte vectors" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 0c23a9f23e21..07d476ac44f9 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3688,7 +3688,8 @@ vectorizable_call (vec_info *vinfo,
     }
 
   int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
-  internal_fn cond_fn = get_conditional_internal_fn (ifn);
+  internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
+                        ? ifn : get_conditional_internal_fn (ifn));
   internal_fn cond_len_fn = get_len_internal_fn (ifn);
   int len_opno = internal_fn_len_index (cond_len_fn);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
@@ -3769,7 +3770,7 @@ vectorizable_call (vec_info *vinfo,
       else if (reduc_idx >= 0)
        gcc_unreachable ();
     }
-  else if (masked_loop_p && reduc_idx >= 0)
+  else if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0)
     {
       ifn = cond_fn;
       vect_nargs += 2;
@@ -3812,8 +3813,10 @@ vectorizable_call (vec_info *vinfo,
          FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
            {
              int varg = 0;
-             if (masked_loop_p && reduc_idx >= 0)
+             /* Add the mask if necessary.  */
+             if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0)
                {
+                 gcc_assert (internal_fn_mask_index (ifn) == varg);
                  unsigned int vec_num = vec_oprnds0.length ();
                  vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks,
                                                      vec_num, vectype_out, i);
@@ -3824,8 +3827,12 @@ vectorizable_call (vec_info *vinfo,
                  vec<tree> vec_oprndsk = vec_defs[k];
                  vargs[varg++] = vec_oprndsk[i];
                }
-             if (masked_loop_p && reduc_idx >= 0)
-               vargs[varg++] = vargs[reduc_idx + 1];
+             /* Add the else value if necessary.  */
+             if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0)
+               {
+                 gcc_assert (internal_fn_else_index (ifn) == varg);
+                 vargs[varg++] = vargs[reduc_idx + 1];
+               }
              if (clz_ctz_arg1)
                vargs[varg++] = clz_ctz_arg1;

Reply via email to