The following fixes the mask argument generation for SIMD clone
calls under either loop masking or when the actual call is not
masked but only a inbranch simd clone is available.  The issue
was that we tried to directly convert the vector mask to the
call argument type but SIMD clone masks require 1 or 0 (which
could be even float) values for mask elements so we have to
resort to a VEC_COND_EXPR to generate them just like we do for
regular passing of the mask.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/112405
        * tree-vect-stmts.cc (vectorizable_simd_clone_call):
        Properly handle invariant and/or loop mask passing.
---
 gcc/tree-vect-stmts.cc | 62 +++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index eefb1eec1ef..65883e04ad7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4814,36 +4814,42 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
              else
                mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
 
-             if (!useless_type_conversion_p (TREE_TYPE (mask), masktype))
+             gassign *new_stmt;
+             if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
                {
-                 gassign *new_stmt;
-                 if (bestn->simdclone->mask_mode != VOIDmode)
-                   {
-                     /* This means we are dealing with integer mask modes.
-                        First convert to an integer type with the same size as
-                        the current vector type.  */
-                     unsigned HOST_WIDE_INT intermediate_size
-                       = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
-                     tree mid_int_type =
-                       build_nonstandard_integer_type (intermediate_size, 1);
-                     mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
-                     new_stmt
-                       = gimple_build_assign (make_ssa_name (mid_int_type),
-                                              mask);
-                     gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
-                     /* Then zero-extend to the mask mode.  */
-                     mask = fold_build1 (NOP_EXPR, masktype,
-                                         gimple_get_lhs (new_stmt));
-                   }
-                 else
-                   mask = build1 (VIEW_CONVERT_EXPR, masktype, mask);
-
-                 new_stmt = gimple_build_assign (make_ssa_name (masktype),
-                                                 mask);
-                 vect_finish_stmt_generation (vinfo, stmt_info,
-                                              new_stmt, gsi);
-                 mask = gimple_assign_lhs (new_stmt);
+                 /* This means we are dealing with integer mask modes.
+                    First convert to an integer type with the same size as
+                    the current vector type.  */
+                 unsigned HOST_WIDE_INT intermediate_size
+                     = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
+                 tree mid_int_type =
+                     build_nonstandard_integer_type (intermediate_size, 1);
+                 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
+                 new_stmt
+                     = gimple_build_assign (make_ssa_name (mid_int_type),
+                                            mask);
+                 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+                 /* Then zero-extend to the mask mode.  */
+                 mask = fold_build1 (NOP_EXPR, masktype,
+                                     gimple_get_lhs (new_stmt));
+               }
+             else if (bestn->simdclone->mask_mode == VOIDmode)
+               {
+                 tree one = fold_convert (TREE_TYPE (masktype),
+                                          integer_one_node);
+                 tree zero = fold_convert (TREE_TYPE (masktype),
+                                           integer_zero_node);
+                 mask = build3 (VEC_COND_EXPR, masktype, mask,
+                                build_vector_from_val (masktype, one),
+                                build_vector_from_val (masktype, zero));
                }
+             else
+               gcc_unreachable ();
+
+             new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
+             vect_finish_stmt_generation (vinfo, stmt_info,
+                                          new_stmt, gsi);
+             mask = gimple_assign_lhs (new_stmt);
              vargs.safe_push (mask);
            }
        }
-- 
2.35.3

Reply via email to