> > >
> > > Hmm, but we're visiting them then?  I wonder how you get along
> > > without doing adjustmens on the uses if you consider
> > >
> > >     _1 = a < b;
> > >     _2 = c != d;
> > >     _3 = _1 | _2;
> > >     if (_3 != 0)
> > >       exit loop;
> > >
> > > thus a combined condition like
> > >
> > >     if (a < b || c != d)
> > >
> > > that we if-converted.  We need to recognize that _1, _2 and _3 have
> > > mask uses and thus possibly adjust them.
> > >
> > > What bad happens if you drop 'analyze_only'?  We're not really
> > > rewriting anything there.
> >
> > You mean drop it only in the above? We then fail to update the type for
> > the gcond.  So in certain circumstances like with
> >
> > int a, c, d;
> > short b;
> >
> > int
> > main ()
> > {
> >   int e[1];
> >   for (; b < 2; b++)
> >     {
> >       a = 0;
> >       if (b == 28378)
> >         a = e[b];
> >       if (!(d || b))
> >         for (; c;)
> >           ;
> >     }
> >   return 0;
> > }
> >
> > Unless we walk the statements regardless of whether they come from inside 
> > the
> loop or not.
> 
> What do you mean by "fail to update the type for the gcond"?  If
> I understood correctly the 'analyze_only' short-cuts some
> checks, it doens't add some?
> 
> But it's hard to follow what's actually done for a gcond ...
> 

Yes so I had realized I had misunderstood what this pattern was doing and once
I had made the first wrong change it snowballed.

This is an updates patch where the only modification made is to 
check_bool_pattern
to also return the type of the overall expression even if we are going to 
handle the
conditional through an optab expansion.  I'm piggybacking on the fact that this 
function
has seen enough of the operands to be able to tell the precision needed when 
vectorizing.

This is needed because in the cases where the condition to the gcond was 
already a bool
The precision would be 1 bit, to find the actual mask since we have to dig 
through the
operands which this function already does.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no 
issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * tree-vect-patterns.cc (vect_init_pattern_stmt): Support gconds.
        (check_bool_pattern, vect_recog_bool_pattern): Support gconds type
        analysis.
        * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts without
        lhs.
        (vectorizable_early_exit): New.
        (vect_analyze_stmt, vect_transform_stmt): Use it.
        (vect_is_simple_use, vect_get_vector_types_for_stmt): Support gcond.

--- inline copy of patch ---

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 
7debe7f0731673cd1bf25cd39d55e23990a73d0e..6bf1c0aba8ce94f70ce4e952efd1c5695b189690
 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -132,6 +132,7 @@ vect_init_pattern_stmt (vec_info *vinfo, gimple 
*pattern_stmt,
   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     {
       gcc_assert (!vectype
+                 || is_a <gcond *> (pattern_stmt)
                  || (VECTOR_BOOLEAN_TYPE_P (vectype)
                      == vect_use_mask_type_p (orig_stmt_info)));
       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
@@ -5210,10 +5211,12 @@ vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
    true if bool VAR can and should be optimized that way.  Assume it shouldn't
    in case it's a result of a comparison which can be directly vectorized into
    a vector comparison.  Fills in STMTS with all stmts visited during the
-   walk.  */
+   walk.  if VECTYPE then this value will contain the common type of the
+   operations making up the comparisons.  */
 
 static bool
-check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
+check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts,
+                   tree *vectype)
 {
   tree rhs1;
   enum tree_code rhs_code;
@@ -5234,27 +5237,28 @@ check_bool_pattern (tree var, vec_info *vinfo, 
hash_set<gimple *> &stmts)
   switch (rhs_code)
     {
     case SSA_NAME:
-      if (! check_bool_pattern (rhs1, vinfo, stmts))
+      if (! check_bool_pattern (rhs1, vinfo, stmts, vectype))
        return false;
       break;
 
     CASE_CONVERT:
       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
        return false;
-      if (! check_bool_pattern (rhs1, vinfo, stmts))
+      if (! check_bool_pattern (rhs1, vinfo, stmts, vectype))
        return false;
       break;
 
     case BIT_NOT_EXPR:
-      if (! check_bool_pattern (rhs1, vinfo, stmts))
+      if (! check_bool_pattern (rhs1, vinfo, stmts, vectype))
        return false;
       break;
 
     case BIT_AND_EXPR:
     case BIT_IOR_EXPR:
     case BIT_XOR_EXPR:
-      if (! check_bool_pattern (rhs1, vinfo, stmts)
-         || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
+      if (! check_bool_pattern (rhs1, vinfo, stmts, vectype)
+         || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts,
+                                  vectype))
        return false;
       break;
 
@@ -5272,6 +5276,8 @@ check_bool_pattern (tree var, vec_info *vinfo, 
hash_set<gimple *> &stmts)
          if (comp_vectype == NULL_TREE)
            return false;
 
+         if (vectype)
+           *vectype = comp_vectype;
          tree mask_type = get_mask_type_for_scalar_type (vinfo,
                                                          TREE_TYPE (rhs1));
          if (mask_type
@@ -5608,13 +5614,28 @@ vect_recog_bool_pattern (vec_info *vinfo,
   enum tree_code rhs_code;
   tree var, lhs, rhs, vectype;
   gimple *pattern_stmt;
-
-  if (!is_gimple_assign (last_stmt))
+  gcond* cond = NULL;
+  if (!is_gimple_assign (last_stmt)
+      && !(cond = dyn_cast <gcond *> (last_stmt)))
     return NULL;
 
-  var = gimple_assign_rhs1 (last_stmt);
-  lhs = gimple_assign_lhs (last_stmt);
-  rhs_code = gimple_assign_rhs_code (last_stmt);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+  if (is_gimple_assign (last_stmt))
+    {
+      var = gimple_assign_rhs1 (last_stmt);
+      lhs = gimple_assign_lhs (last_stmt);
+      rhs_code = gimple_assign_rhs_code (last_stmt);
+    }
+  else if (loop_vinfo && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+    {
+      /* If not multiple exits, and loop vectorization don't bother analyzing
+        the gcond as we don't support SLP today.  */
+      lhs = gimple_cond_lhs (last_stmt);
+      var = gimple_cond_lhs (last_stmt);
+      rhs_code = gimple_cond_code (last_stmt);
+    }
+  else
+    return NULL;
 
   if (rhs_code == VIEW_CONVERT_EXPR)
     var = TREE_OPERAND (var, 0);
@@ -5632,7 +5653,7 @@ vect_recog_bool_pattern (vec_info *vinfo,
        return NULL;
       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
 
-      if (check_bool_pattern (var, vinfo, bool_stmts))
+      if (check_bool_pattern (var, vinfo, bool_stmts, NULL))
        {
          rhs = adjust_bool_stmts (vinfo, bool_stmts,
                                   TREE_TYPE (lhs), stmt_vinfo);
@@ -5680,7 +5701,7 @@ vect_recog_bool_pattern (vec_info *vinfo,
 
       return pattern_stmt;
     }
-  else if (rhs_code == COND_EXPR
+  else if ((rhs_code == COND_EXPR || cond)
           && TREE_CODE (var) == SSA_NAME)
     {
       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
@@ -5700,18 +5721,33 @@ vect_recog_bool_pattern (vec_info *vinfo,
       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
        return NULL;
 
-      if (check_bool_pattern (var, vinfo, bool_stmts))
+      tree comp_type = NULL_TREE;
+      if (check_bool_pattern (var, vinfo, bool_stmts, &comp_type))
        var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
-      else if (integer_type_for_mask (var, vinfo))
+      else if (!cond && integer_type_for_mask (var, vinfo))
+       return NULL;
+      else if (cond && !comp_type)
        return NULL;
 
-      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
-      pattern_stmt 
-       = gimple_build_assign (lhs, COND_EXPR,
-                              build2 (NE_EXPR, boolean_type_node,
-                                      var, build_int_cst (TREE_TYPE (var), 0)),
-                              gimple_assign_rhs2 (last_stmt),
-                              gimple_assign_rhs3 (last_stmt));
+      if (!cond)
+       {
+         lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+         pattern_stmt
+           = gimple_build_assign (lhs, COND_EXPR,
+                                  build2 (NE_EXPR, boolean_type_node, var,
+                                          build_int_cst (TREE_TYPE (var), 0)),
+                                  gimple_assign_rhs2 (last_stmt),
+                                  gimple_assign_rhs3 (last_stmt));
+       }
+      else
+       {
+         pattern_stmt
+           = gimple_build_cond (NE_EXPR,
+                                var, build_int_cst (TREE_TYPE (var), 0),
+                                gimple_cond_true_label (cond),
+                                gimple_cond_false_label (cond));
+         vectype = truth_type_for (comp_type);
+       }
       *type_out = vectype;
       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
 
@@ -5725,7 +5761,7 @@ vect_recog_bool_pattern (vec_info *vinfo,
       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
        return NULL;
 
-      if (check_bool_pattern (var, vinfo, bool_stmts))
+      if (check_bool_pattern (var, vinfo, bool_stmts, NULL))
        rhs = adjust_bool_stmts (vinfo, bool_stmts,
                                 TREE_TYPE (vectype), stmt_vinfo);
       else
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 
582c5e678fad802d6e76300fe3c939b9f2978f17..d0878250f6fb9de4d6e6a39d16956ca147be4b80
 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12489,7 +12489,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree 
vectype,
   vec<tree> vec_oprnds0 = vNULL;
   vec<tree> vec_oprnds1 = vNULL;
   tree mask_type;
-  tree mask;
+  tree mask = NULL_TREE;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -12629,8 +12629,9 @@ vectorizable_comparison_1 (vec_info *vinfo, tree 
vectype,
   /* Transform.  */
 
   /* Handle def.  */
-  lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
-  mask = vect_create_destination_var (lhs, mask_type);
+  lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
+  if (lhs)
+    mask = vect_create_destination_var (lhs, mask_type);
 
   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
                     rhs1, &vec_oprnds0, vectype,
@@ -12644,7 +12645,10 @@ vectorizable_comparison_1 (vec_info *vinfo, tree 
vectype,
       gimple *new_stmt;
       vec_rhs2 = vec_oprnds1[i];
 
-      new_temp = make_ssa_name (mask);
+      if (lhs)
+       new_temp = make_ssa_name (mask);
+      else
+       new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
       if (bitop1 == NOP_EXPR)
        {
          new_stmt = gimple_build_assign (new_temp, code,
@@ -12723,6 +12727,198 @@ vectorizable_comparison (vec_info *vinfo,
   return true;
 }
 
+/* Check to see if the current early break given in STMT_INFO is valid for
+   vectorization.  */
+
+static bool
+vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
+                        gimple_stmt_iterator *gsi, gimple **vec_stmt,
+                        slp_tree slp_node, stmt_vector_for_cost *cost_vec)
+{
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+  if (!loop_vinfo
+      || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
+    return false;
+
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
+    return false;
+
+  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+    return false;
+
+  auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  gcc_assert (vectype);
+
+  tree vectype_op0 = NULL_TREE;
+  slp_tree slp_op0;
+  tree op0;
+  enum vect_def_type dt0;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
+                          &vectype_op0))
+    {
+      if (dump_enabled_p ())
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                          "use not simple.\n");
+       return false;
+    }
+
+  machine_mode mode = TYPE_MODE (vectype);
+  int ncopies;
+
+  if (slp_node)
+    ncopies = 1;
+  else
+    ncopies = vect_get_num_copies (loop_vinfo, vectype);
+
+  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+  bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+
+  /* Analyze only.  */
+  if (!vec_stmt)
+    {
+      if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
+       {
+         if (dump_enabled_p ())
+             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                              "can't vectorize early exit because the "
+                              "target doesn't support flag setting vector "
+                              "comparisons.\n");
+         return false;
+       }
+
+      if (ncopies > 1
+         && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing)
+       {
+         if (dump_enabled_p ())
+             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                              "can't vectorize early exit because the "
+                              "target does not support boolean vector OR for "
+                              "type %T.\n", vectype);
+         return false;
+       }
+
+      if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
+                                     vec_stmt, slp_node, cost_vec))
+       return false;
+
+      if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+       {
+         if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
+                                             OPTIMIZE_FOR_SPEED))
+           return false;
+         else
+           vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
+       }
+
+
+      return true;
+    }
+
+  /* Tranform.  */
+
+  tree new_temp = NULL_TREE;
+  gimple *new_stmt = NULL;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
+
+  if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
+                                 vec_stmt, slp_node, cost_vec))
+    gcc_unreachable ();
+
+  gimple *stmt = STMT_VINFO_STMT (stmt_info);
+  basic_block cond_bb = gimple_bb (stmt);
+  gimple_stmt_iterator  cond_gsi = gsi_last_bb (cond_bb);
+
+  auto_vec<tree> stmts;
+
+  tree mask = NULL_TREE;
+  if (masked_loop_p)
+    mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0);
+
+  if (slp_node)
+    stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
+  else
+    {
+      auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
+      stmts.reserve_exact (vec_stmts.length ());
+      for (auto stmt : vec_stmts)
+       stmts.quick_push (gimple_assign_lhs (stmt));
+    }
+
+  /* Determine if we need to reduce the final value.  */
+  if (stmts.length () > 1)
+    {
+      /* We build the reductions in a way to maintain as much parallelism as
+        possible.  */
+      auto_vec<tree> workset (stmts.length ());
+
+      /* Mask the statements as we queue them up.  */
+      if (masked_loop_p)
+       for (auto stmt : stmts)
+         workset.quick_push (prepare_vec_mask (loop_vinfo, TREE_TYPE (mask),
+                                               mask, stmt, &cond_gsi));
+      else
+       workset.splice (stmts);
+
+      while (workset.length () > 1)
+       {
+         new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
+         tree arg0 = workset.pop ();
+         tree arg1 = workset.pop ();
+         new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
+         vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
+                                      &cond_gsi);
+         workset.quick_insert (0, new_temp);
+       }
+    }
+  else
+    new_temp = stmts[0];
+
+  gcc_assert (new_temp);
+
+  tree cond = new_temp;
+  /* If we have multiple statements after reduction we should check all the
+     lanes and treat it as a full vector.  */
+  if (masked_loop_p)
+    cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond,
+                            &cond_gsi);
+
+  /* Now build the new conditional.  Pattern gimple_conds get dropped during
+     codegen so we must replace the original insn.  */
+  stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
+  gcond *cond_stmt = as_a <gcond *>(stmt);
+  /* When vectorizing we assume that if the branch edge is taken that we're
+     exiting the loop.  This is not however always the case as the compiler 
will
+     rewrite conditions to always be a comparison against 0.  To do this it
+     sometimes flips the edges.  This is fine for scalar,  but for vector we
+     then have to flip the test, as we're still assuming that if you take the
+     branch edge that we found the exit condition.  */
+  auto new_code = NE_EXPR;
+  tree cst = build_zero_cst (vectype);
+  if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+                            BRANCH_EDGE (gimple_bb (cond_stmt))->dest))
+    {
+      new_code = EQ_EXPR;
+      cst = build_minus_one_cst (vectype);
+    }
+
+  gimple_cond_set_condition (cond_stmt, new_code, cond, cst);
+  update_stmt (stmt);
+
+  if (slp_node)
+    SLP_TREE_VEC_DEFS (slp_node).truncate (0);
+   else
+    STMT_VINFO_VEC_STMTS (stmt_info).truncate (0);
+
+
+  if (!slp_node)
+    *vec_stmt = stmt;
+
+  return true;
+}
+
 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
    can handle all live statements in the node.  Otherwise return true
    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
@@ -12949,7 +13145,9 @@ vect_analyze_stmt (vec_info *vinfo,
          || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
                                  stmt_info, NULL, node)
          || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
-                                  stmt_info, NULL, node, cost_vec));
+                                  stmt_info, NULL, node, cost_vec)
+         || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
+                                     cost_vec));
   else
     {
       if (bb_vinfo)
@@ -12972,7 +13170,10 @@ vect_analyze_stmt (vec_info *vinfo,
                                         NULL, NULL, node, cost_vec)
              || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
                                          cost_vec)
-             || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
+             || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
+             || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
+                                         cost_vec));
+
     }
 
   if (node)
@@ -13131,6 +13332,12 @@ vect_transform_stmt (vec_info *vinfo,
       gcc_assert (done);
       break;
 
+    case loop_exit_ctrl_vec_info_type:
+      done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
+                                     slp_node, NULL);
+      gcc_assert (done);
+      break;
+
     default:
       if (!STMT_VINFO_LIVE_P (stmt_info))
        {
@@ -14321,10 +14528,19 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, 
stmt_vec_info stmt_info,
     }
   else
     {
+      gcond *cond = NULL;
       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
        scalar_type = TREE_TYPE (DR_REF (dr));
       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
        scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
+      else if ((cond = dyn_cast <gcond *> (stmt)))
+       {
+         /* We can't convert the scalar type to boolean yet, since booleans 
have a
+            single bit precision and we need the vector boolean to be a
+            representation of the integer mask.  So set the correct integer 
type and
+            convert to boolean vector once we have a vectype.  */
+         scalar_type = TREE_TYPE (gimple_cond_lhs (cond));
+       }
       else
        scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 
@@ -14339,12 +14555,18 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, 
stmt_vec_info stmt_info,
                             "get vectype for scalar type: %T\n", scalar_type);
        }
       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
+
       if (!vectype)
        return opt_result::failure_at (stmt,
                                       "not vectorized:"
                                       " unsupported data-type %T\n",
                                       scalar_type);
 
+      /* If we were a gcond, convert the resulting type to a vector boolean 
type now
+        that we have the correct integer mask type.  */
+      if (cond)
+       vectype = truth_type_for (vectype);
+
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
     }

Attachment: rb17969 (2).patch
Description: rb17969 (2).patch

Reply via email to