https://gcc.gnu.org/g:18a85f9107df49f191a713e191150a9e7c902b04
commit r16-5414-g18a85f9107df49f191a713e191150a9e7c902b04 Author: Richard Biener <[email protected]> Date: Wed Nov 19 10:33:05 2025 +0100 tree-optimization/122747 - fix masking of loops with conditional reduction ops The following amends the earlier fix in r16-5372-gfacb92812a4ec5 to also cover the case we're not doing the merge to a single cycle but go via vectorizable_call. PR tree-optimization/122747 * tree-vect-stmts.cc (vectorizable_call): Handle reduction operations that are already conditional. * gcc.target/i386/vect-epilogues-10.c: New testcase. Diff: --- gcc/testsuite/gcc.target/i386/vect-epilogues-10.c | 20 ++++++++++++++++++++ gcc/tree-vect-stmts.cc | 17 ++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/vect-epilogues-10.c b/gcc/testsuite/gcc.target/i386/vect-epilogues-10.c new file mode 100644 index 000000000000..a187955e6f17 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-epilogues-10.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 --param vect-partial-vector-usage=1 -fdump-tree-vect-optimized" } */ + +double foo (double *a, long long *mask, int n) +{ + double sum = 0.0; + for (int i = 0; i < n; ++i) + { + double val; + if (mask[i]) + val = a[i]; + else + val = -0.0; + sum = sum + val; + } + return sum; +} + +/* { dg-final { scan-tree-dump "optimized: loop vectorized using 64 byte vectors" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: epilogue loop vectorized using masked 64 byte vectors" "vect" } } */ diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 0c23a9f23e21..07d476ac44f9 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -3688,7 +3688,8 @@ vectorizable_call (vec_info *vinfo, } int reduc_idx = SLP_TREE_REDUC_IDX (slp_node); - internal_fn cond_fn = get_conditional_internal_fn (ifn); + internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1 + ? ifn : get_conditional_internal_fn (ifn)); internal_fn cond_len_fn = get_len_internal_fn (ifn); int len_opno = internal_fn_len_index (cond_len_fn); vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); @@ -3769,7 +3770,7 @@ vectorizable_call (vec_info *vinfo, else if (reduc_idx >= 0) gcc_unreachable (); } - else if (masked_loop_p && reduc_idx >= 0) + else if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0) { ifn = cond_fn; vect_nargs += 2; @@ -3812,8 +3813,10 @@ vectorizable_call (vec_info *vinfo, FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) { int varg = 0; - if (masked_loop_p && reduc_idx >= 0) + /* Add the mask if necessary. */ + if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0) { + gcc_assert (internal_fn_mask_index (ifn) == varg); unsigned int vec_num = vec_oprnds0.length (); vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_out, i); @@ -3824,8 +3827,12 @@ vectorizable_call (vec_info *vinfo, vec<tree> vec_oprndsk = vec_defs[k]; vargs[varg++] = vec_oprndsk[i]; } - if (masked_loop_p && reduc_idx >= 0) - vargs[varg++] = vargs[reduc_idx + 1]; + /* Add the else value if necessary. */ + if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0) + { + gcc_assert (internal_fn_else_index (ifn) == varg); + vargs[varg++] = vargs[reduc_idx + 1]; + } if (clz_ctz_arg1) vargs[varg++] = clz_ctz_arg1;
