Jennifer Schmitz <jschm...@nvidia.com> writes:
> @@ -3622,6 +3631,57 @@ gimple_folder::redirect_pred_x ()
>    return redirect_call (instance);
>  }
>  
> +/* Fold calls with predicate pfalse:
> +   _m predication: lhs = op1.
> +   _x or _z: lhs = {0, ...}.
> +   Implicit predication that reads from memory: lhs = {0, ...}.
> +   Implicit predication that writes to memory or prefetches: no-op.
> +   Return the new gimple statement on success, else NULL.  */
> +gimple *
> +gimple_folder::fold_pfalse ()
> +{
> +  if (pred == PRED_none)
> +    return nullptr;
> +  tree arg0 = gimple_call_arg (call, 0);
> +  if (pred == PRED_m)
> +    {
> +      /* Unary function shapes with _m predication are folded to the
> +      inactive vector (arg0), while other function shapes are folded
> +      to op1 (arg1).  */
> +      tree arg1 = gimple_call_arg (call, 1);
> +      tree t;
> +      if (is_pfalse (arg1))
> +     t = arg0;
> +      else if (is_pfalse (arg0))
> +     t = arg1;
> +      else
> +     return nullptr;
> +      /* In some intrinsics, e.g. svqshlu, lhs and op1 have different types,
> +      such that folding to op1 needs a type conversion.  */
> +      if (TREE_TYPE (t) == TREE_TYPE (lhs))
> +     return fold_call_to (t);
> +      else
> +     {
> +       tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), t);
> +       return gimple_build_assign (lhs, VIEW_CONVERT_EXPR, rhs);
> +     }

I think we should move the VIEW_CONVERT_EXPR into fold_call_to,
in case it's useful elsewhere.  Admittedly that could also mask bugs,
but still, it's seems like a nice facility to have.

Using types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (t)) would be
more general than direct == between types.

This block would then become:

      /* Unary function shapes with _m predication are folded to the
         inactive vector (arg0), while other function shapes are folded
         to op1 (arg1).  */
      tree arg1 = gimple_call_arg (call, 1);
      if (is_pfalse (arg1))
        return fold_call_to (arg0);
      if (is_pfalse (arg0))
        return fold_call_to (arg1);
      return nullptr;

> +    }
> +  if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0))
> +    return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
> +  if (pred == PRED_implicit && is_pfalse (arg0))
> +    {
> +      unsigned int flags = call_properties ();
> +      /* Folding to lhs = {0, ...} is not appropriate for intrinsics with
> +      AGGREGATE types as lhs.  */
> +      if ((flags & CP_READ_MEMORY)
> +       && !AGGREGATE_TYPE_P (TREE_TYPE (lhs)))
> +     return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
> +      if (flags & (CP_WRITE_MEMORY | CP_PREFETCH_MEMORY))
> +     return fold_to_stmt_vops (gimple_build_nop ());
> +    }
> +  return nullptr;
> +}
> +
>  /* Fold the call to constant VAL.  */
>  gimple *
>  gimple_folder::fold_to_cstu (poly_uint64 val)
> @@ -3724,6 +3784,23 @@ gimple_folder::fold_active_lanes_to (tree x)
>    return gimple_build_assign (lhs, VEC_COND_EXPR, pred, x, vec_inactive);
>  }
>  
> +/* Fold call to assignment statement lhs = t.  */
> +gimple *
> +gimple_folder::fold_call_to (tree t)
> +{
> +  return fold_to_stmt_vops (gimple_build_assign (lhs, t));
> +}

This would then become something like:

/* Fold the result of the call to T, bitcasting to the right type if
   necessary.  */
gimple *
gimple_folder::fold_call_to (tree t)
{
  if (types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (t)))
    return fold_to_stmt_vops (gimple_build_assign (lhs, t));

  tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), t);
  return fold_to_stmt_vops (gimple_build_assign (lhs, VIEW_CONVERT_EXPR, rhs));
}

OK with those changes, thanks.

Richard

Reply via email to