Jennifer Schmitz <jschm...@nvidia.com> writes:
> @@ -3622,6 +3631,57 @@ gimple_folder::redirect_pred_x ()
> return redirect_call (instance);
> }
>
> +/* Fold calls with predicate pfalse:
> + _m predication: lhs = op1.
> + _x or _z: lhs = {0, ...}.
> + Implicit predication that reads from memory: lhs = {0, ...}.
> + Implicit predication that writes to memory or prefetches: no-op.
> + Return the new gimple statement on success, else NULL. */
> +gimple *
> +gimple_folder::fold_pfalse ()
> +{
> + if (pred == PRED_none)
> + return nullptr;
> + tree arg0 = gimple_call_arg (call, 0);
> + if (pred == PRED_m)
> + {
> + /* Unary function shapes with _m predication are folded to the
> + inactive vector (arg0), while other function shapes are folded
> + to op1 (arg1). */
> + tree arg1 = gimple_call_arg (call, 1);
> + tree t;
> + if (is_pfalse (arg1))
> + t = arg0;
> + else if (is_pfalse (arg0))
> + t = arg1;
> + else
> + return nullptr;
> + /* In some intrinsics, e.g. svqshlu, lhs and op1 have different types,
> + such that folding to op1 needs a type conversion. */
> + if (TREE_TYPE (t) == TREE_TYPE (lhs))
> + return fold_call_to (t);
> + else
> + {
> + tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), t);
> + return gimple_build_assign (lhs, VIEW_CONVERT_EXPR, rhs);
> + }
I think we should move the VIEW_CONVERT_EXPR into fold_call_to,
in case it's useful elsewhere. Admittedly that could also mask bugs,
but still, it's seems like a nice facility to have.
Using types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (t)) would be
more general than direct == between types.
This block would then become:
/* Unary function shapes with _m predication are folded to the
inactive vector (arg0), while other function shapes are folded
to op1 (arg1). */
tree arg1 = gimple_call_arg (call, 1);
if (is_pfalse (arg1))
return fold_call_to (arg0);
if (is_pfalse (arg0))
return fold_call_to (arg1);
return nullptr;
> + }
> + if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0))
> + return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
> + if (pred == PRED_implicit && is_pfalse (arg0))
> + {
> + unsigned int flags = call_properties ();
> + /* Folding to lhs = {0, ...} is not appropriate for intrinsics with
> + AGGREGATE types as lhs. */
> + if ((flags & CP_READ_MEMORY)
> + && !AGGREGATE_TYPE_P (TREE_TYPE (lhs)))
> + return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
> + if (flags & (CP_WRITE_MEMORY | CP_PREFETCH_MEMORY))
> + return fold_to_stmt_vops (gimple_build_nop ());
> + }
> + return nullptr;
> +}
> +
> /* Fold the call to constant VAL. */
> gimple *
> gimple_folder::fold_to_cstu (poly_uint64 val)
> @@ -3724,6 +3784,23 @@ gimple_folder::fold_active_lanes_to (tree x)
> return gimple_build_assign (lhs, VEC_COND_EXPR, pred, x, vec_inactive);
> }
>
> +/* Fold call to assignment statement lhs = t. */
> +gimple *
> +gimple_folder::fold_call_to (tree t)
> +{
> + return fold_to_stmt_vops (gimple_build_assign (lhs, t));
> +}
This would then become something like:
/* Fold the result of the call to T, bitcasting to the right type if
necessary. */
gimple *
gimple_folder::fold_call_to (tree t)
{
if (types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (t)))
return fold_to_stmt_vops (gimple_build_assign (lhs, t));
tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), t);
return fold_to_stmt_vops (gimple_build_assign (lhs, VIEW_CONVERT_EXPR, rhs));
}
OK with those changes, thanks.
Richard