On Tue, May 25, 2021 at 6:24 PM Richard Biener
<richard.guent...@gmail.com> wrote:
>
> On Mon, May 24, 2021 at 11:52 AM Hongtao Liu <crazy...@gmail.com> wrote:
> >
> > Hi:
> >   Details described in PR.
> >   Bootstrapped and regtest on
> > x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
> > -march=cascadelake,-march=cascadelake}
> >   Ok for trunk?
>
> +static tree
> +strip_nop_cond_scalar_reduction (bool has_nop, tree op)
> +{
> +  if (!has_nop)
> +    return op;
> +
> +  if (TREE_CODE (op) != SSA_NAME)
> +    return NULL_TREE;
> +
> +  gimple* stmt = SSA_NAME_DEF_STMT (op);
> +  if (!stmt
> +      || gimple_code (stmt) != GIMPLE_ASSIGN
> +      || gimple_has_volatile_ops (stmt)
> +      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
> +    return NULL_TREE;
> +
> +  return gimple_assign_rhs1 (stmt);
>
> this allows arbitrary conversions where the comment suggests you
> only want to allow conversions to the same precision but different sign.
> Sth like
>
>   gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
>   if (!stmt
>       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
>       || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
> (gimple_assign_rhs1 (stmt))))
>     return NULL_TREE;
>
> +      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
> +         || gimple_code (stmt) != GIMPLE_ASSIGN
> +         || gimple_has_volatile_ops (stmt))
> +       return false;
>
> !is_gimple_assign (stmt) instead of gimple_code (stmt) != GIMPLE_ASSIGN
>
> the gimple_has_volatile_ops check is superfluous given you restrict
> the assign code.
>
> +      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
> +      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
> +       {
> +         gimple *use_stmt = USE_STMT (use_p);
> +         if (is_gimple_debug (use_stmt))
> +           continue;
> +         if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
> +           continue;
> +         if (gimple_code (use_stmt) != GIMPLE_PHI)
> +           return false;
>
> can the last check be use_stmt == phi since we should have the
> PHI readily available?
>
> @@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc,
> gimple_stmt_iterator *gsi,
>    rhs = fold_build2 (gimple_assign_rhs_code (reduc),
>                      TREE_TYPE (rhs1), op0, tmp);
>
> +  if (has_nop)
> +    {
> +      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
> +      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
> +      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
> +      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
> +      /* Rebuild rhs for nop_expr.  */
> +      rhs = fold_build1 (NOP_EXPR,
> +                        TREE_TYPE (gimple_assign_lhs (nop_reduc)),
> +                        nop_rhs);
> +
> +      /* Delete nop_reduc.  */
> +      stmt_it = gsi_for_stmt (nop_reduc);
> +      gsi_remove (&stmt_it, true);
> +      release_defs (nop_reduc);
> +    }
> +
>
> hmm, the whole function could be cleaned up with sth like
>
>      /* Build rhs for unconditional increment/decrement.  */
>      gimple_seq stmts = NULL;
>      rhs = gimple_build (&stmts, gimple_assing_rhs_code (reduc),
>                                     TREE_TYPE (rhs1), op0, tmp);
>      if (has_nop)
>        rhs = gimple_convert (&stmts, TREE_TYPE (gimple_assign_lhs
> (nop_reduc)), rhs);
>      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>
> plus in the caller moving the
>
>       new_stmt = gimple_build_assign (res, rhs);
>       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
>
> to the else branch as well as the folding done on new_stmt (maybe return
> new_stmt instead of rhs from convert_scalar_cond_reduction.
Eventually, we needed to assign rhs to res, and with an extra mov stmt
from rhs to res, the vectorizer failed.
the only difference in 166t.ifcvt between successfully vectorization
and failed vectorization is below
   char * _24;
   char _25;
   unsigned char _ifc__29;
+  unsigned char _30;

   <bb 2> [local count: 118111600]:
   if (n_10(D) != 0)
@@ -70,7 +71,8 @@ char foo2 (char * a, char * c, int n)
   _5 = c_14(D) + _1;
   _6 = *_5;
   _ifc__29 = _3 == _6 ? 1 : 0;
-  cnt_7 = cnt_18 + _ifc__29;
+  _30 = cnt_18 + _ifc__29;
+  cnt_7 = _30;
   i_16 = i_20 + 1;
   if (n_10(D) != i_16)
     goto <bb 9>; [89.00%]
@@ -110,7 +112,7 @@ char foo2 (char * a, char * c, int n)
   goto <bb 12>; [100.00%]

   <bb 6> [local count: 105119324]:
-  # cnt_19 = PHI <cnt_7(3), cnt_27(15)>
+  # cnt_19 = PHI <_30(3), cnt_27(15)>
   _21 = (char) cnt_19;

if we want to eliminate the extra move, gimple_build and
gimple_convert is not suitable since they create a new lhs, is there
any interface like gimple_build_assign but accept stmts?
>
> Richard.
>
> >   gcc/ChangeLog:
> >
> >         PR tree-optimization/pr98365
> >         * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
> >         (is_cond_scalar_reduction): Handle nop_expr in cond scalar 
> > reduction.
> >         (convert_scalar_cond_reduction): Ditto.
> >         (predicate_scalar_phi): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         PR tree-optimization/pr98365
> >         * gcc.target/i386/pr98365.c: New test.
> >
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao

Reply via email to