> > else if (ret_elt_bits > arg_elt_bits)
> > modifier = WIDEN;
> >
> > + if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > + {
> > + g = gimple_build_assign (lhs, code1, arg);
> > + gsi_replace (gsi, g, false);
> > + return;
> > + }
>
> Given the API change I suggest below it might make sense to have
> supportable_indirect_convert_operation do the above and represent it as
> single-
> step conversion?
>
OK, if you want to supportable_indirect_convert_operation can do something like
supportable_convert_operation, I'll give it a try. This functionality is really
the part that this function can cover. But this would require some changes not
only the API change, because supportable_indirect_convert_operation originally
only supported Float -> Int or Int ->Float.
>
> > + code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> > + int multi_step_cvt = 0;
> > + vec<tree> interm_types = vNULL;
> > + if (supportable_indirect_convert_operation (NULL,
> > + code,
> > + ret_type, arg_type,
> > + &code2, &code3,
> > + &multi_step_cvt,
> > + &interm_types, arg))
> > + {
> > + new_rhs = make_ssa_name (interm_types[0]);
> > + g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> > + gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > + g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> > + gsi_replace (gsi, g, false);
> > + return;
> > + }
> > +
> > if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> FLOAT_EXPR))
> > {
> > - if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > - {
> > - g = gimple_build_assign (lhs, code1, arg);
> > - gsi_replace (gsi, g, false);
> > - return;
> > - }
> > /* Can't use get_compute_type here, as supportable_convert_operation
> > doesn't necessarily use an optab and needs two arguments. */
> > tree vec_compute_type
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index
> > 05a169ecb2d..0aa608202ca 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> > tree scalar_dest;
> > tree op0, op1 = NULL_TREE;
> > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> > - tree_code tc1, tc2;
> > + tree_code tc1;
> > code_helper code, code1, code2;
> > code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
> > tree new_temp;
> > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
> > break;
> > }
> >
> > - /* For conversions between float and integer types try whether
> > - we can use intermediate signed integer types to support the
> > - conversion. */
> > - if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > - && (code == FLOAT_EXPR ||
> > - (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > - {
> > - bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> (lhs_mode);
> > - bool float_expr_p = code == FLOAT_EXPR;
> > - unsigned short target_size;
> > - scalar_mode intermediate_mode;
> > - if (demotion)
> > - {
> > - intermediate_mode = lhs_mode;
> > - target_size = GET_MODE_SIZE (rhs_mode);
> > - }
> > - else
> > - {
> > - target_size = GET_MODE_SIZE (lhs_mode);
> > - if (!int_mode_for_size
> > - (GET_MODE_BITSIZE (rhs_mode), 0).exists
> (&intermediate_mode))
> > - goto unsupported;
> > - }
> > - code1 = float_expr_p ? code : NOP_EXPR;
> > - codecvt1 = float_expr_p ? NOP_EXPR : code;
> > - opt_scalar_mode mode_iter;
> > - FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > - {
> > - intermediate_mode = mode_iter.require ();
> > -
> > - if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > - break;
> > -
> > - scalar_mode cvt_mode;
> > - if (!int_mode_for_size
> > - (GET_MODE_BITSIZE (intermediate_mode), 0).exists
> (&cvt_mode))
> > - break;
> > -
> > - cvt_type = build_nonstandard_integer_type
> > - (GET_MODE_BITSIZE (cvt_mode), 0);
> > -
> > - /* Check if the intermediate type can hold OP0's range.
> > - When converting from float to integer this is not necessary
> > - because values that do not fit the (smaller) target type are
> > - unspecified anyway. */
> > - if (demotion && float_expr_p)
> > - {
> > - wide_int op_min_value, op_max_value;
> > - if (!vect_get_range_info (op0, &op_min_value,
> &op_max_value))
> > - break;
> > -
> > - if (cvt_type == NULL_TREE
> > - || (wi::min_precision (op_max_value, SIGNED)
> > - > TYPE_PRECISION (cvt_type))
> > - || (wi::min_precision (op_min_value, SIGNED)
> > - > TYPE_PRECISION (cvt_type)))
> > - continue;
> > - }
> > -
> > - cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
> > slp_node);
> > - /* This should only happened for SLP as long as loop vectorizer
> > - only supports same-sized vector. */
> > - if (cvt_type == NULL_TREE
> > - || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
> > - || !supportable_convert_operation ((tree_code) code1,
> > - vectype_out,
> > - cvt_type, &tc1)
> > - || !supportable_convert_operation ((tree_code) codecvt1,
> > - cvt_type,
> > - vectype_in, &tc2))
> > - continue;
> > -
> > - found_mode = true;
> > - break;
> > - }
> > + if (supportable_indirect_convert_operation (vinfo,
> > + code,
> > + vectype_out,
> > + vectype_in,
> > + &code1,
> > + &codecvt1,
> > + &multi_step_cvt,
> > + &interm_types,
> > + op0,slp_node))
> > + break;
> >
> > - if (found_mode)
> > - {
> > - multi_step_cvt++;
> > - interm_types.safe_push (cvt_type);
> > - cvt_type = NULL_TREE;
> > - code1 = tc1;
> > - codecvt1 = tc2;
> > - break;
> > - }
> > - }
> > /* FALLTHRU */
> > unsupported:
> > if (dump_enabled_p ())
> > @@ -14626,6 +14551,153 @@ supportable_narrowing_operation
> (code_helper code,
> > return false;
> > }
> >
> > +/* Function supportable_indirect_convert_operation
> > +
> > + Check whether an operation represented by the code CODE is two
> > + convert operations that are supported by the target platform in
> > + vector form (i.e., when operating on arguments of type VECTYPE_IN
> > + producing a result of type VECTYPE_OUT).
> > +
> > + Convert operations we currently support directly are FIX_TRUNC and
> > FLOAT.
> > + This function checks if these operations are supported
> > + by the target platform directly (via vector tree-codes).
> > +
> > + Output:
> > + - CODE1 is the code of a vector operation to be used when
> > + converting the operation in the first step, if available.
> > + - CODE2 is the code of a vector operation to be used when
> > + converting the operation in the second step, if available.
> > + - MULTI_STEP_CVT determines the number of required intermediate steps
> in
> > + case of multi-step conversion (like int->short->char - in that case
> > + MULTI_STEP_CVT will be 1). In the function, it should be 1.
> > + - INTERM_TYPES contains the intermediate type required to perform the
> > + convert operation (short in the above example). */
> > +bool
> > +supportable_indirect_convert_operation (vec_info *vinfo,
> > + code_helper code,
> > + tree vectype_out,
> > + tree vectype_in,
> > + code_helper *code1,
> > + code_helper *code2,
> > + int *multi_step_cvt,
> > + vec<tree> *interm_types,
>
> This API is somewhat awkward, as we're inventing a new one I guess we can do
> better. I think we want
>
> vec<std::pair<tree, tree_code> > *converts,
>
> covering all code1, code2, multi_step_cvt and interm_types with the conversion
> sequence being
>
> converts[0].first tem0 = converts[0].second op0;
> converts[1].first tem1 = converts[1].second tem;
>
That's great, this really makes the function work better.
>
> ... while converts.length () determines the length of the chain, one being a
> direct
> conversion where then converts[0].first is vectype_out. That would allow
> double -> char to go double -> float -> int -> short -> char for example.
>
I'm trying to determine the requirements, do you want this function to support
multiple conversions (the current implementation just does a two-step
conversion, like double -> char, which becomes double -> int -> char). Actually
we should be able to do all conversions in two steps, if we have some suitable
instructions. I can't think of a scenario where multiple conversions are needed
yet. Could you give me some examples? Of course, I could tweak this feature in
advance if it is for future consideration.
>
> > + tree op0,
> > + slp_tree slp_node)
>
> I would like to avoid passing VINFO and SLP_NODE here, see below.
> The same is true for OP0 where the existing use is wrong for SLP already, but
> I
> guess that can stay for now (I opened PR115538 about the wrong-code issue).
>
Thanks, I have removed them.
>
> > +{
> > + bool found_mode = false;
> > + scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
> > + scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
> > + opt_scalar_mode mode_iter;
> > + tree_code tc1, tc2;
> > +
> > + tree cvt_type = NULL_TREE;
> > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
> > +
> > + (*multi_step_cvt) = 0;
> > + /* For conversions between float and integer types try whether
> > + we can use intermediate signed integer types to support the
> > + conversion. */
> > + if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > + && (code == FLOAT_EXPR
> > + || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > + {
> > + bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> (lhs_mode);
> > + bool float_expr_p = code == FLOAT_EXPR;
> > + unsigned short target_size;
> > + scalar_mode intermediate_mode;
> > + if (demotion)
> > + {
> > + intermediate_mode = lhs_mode;
> > + target_size = GET_MODE_SIZE (rhs_mode);
> > + }
> > + else
> > + {
> > + target_size = GET_MODE_SIZE (lhs_mode);
> > + if (!int_mode_for_size
> > + (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
> > + return false;
> > + }
> > + *code1 = float_expr_p ? code : NOP_EXPR;
> > + *code2 = float_expr_p ? NOP_EXPR : code;
> > + opt_scalar_mode mode_iter;
> > + FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > + {
> > + intermediate_mode = mode_iter.require ();
> > +
> > + if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > + break;
> > +
> > + scalar_mode cvt_mode;
> > + if (!int_mode_for_size
> > + (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
> > + break;
> > +
> > + cvt_type = build_nonstandard_integer_type
> > + (GET_MODE_BITSIZE (cvt_mode), 0);
> > +
> > + /* Check if the intermediate type can hold OP0's range.
> > + When converting from float to integer this is not necessary
> > + because values that do not fit the (smaller) target type are
> > + unspecified anyway. */
> > + if (demotion && float_expr_p)
> > + {
> > + wide_int op_min_value, op_max_value;
> > + /* For vector form, it looks like op0 doesn't have RANGE_INFO.
> > + In the future, if it is supported, changes may need to be made
> > + to this part, such as checking the RANGE of each element
> > + in the vector. */
> > + if (!SSA_NAME_RANGE_INFO (op0)
> > + || !vect_get_range_info (op0, &op_min_value,
> &op_max_value))
> > + break;
> > +
> > + if (cvt_type == NULL_TREE
> > + || (wi::min_precision (op_max_value, SIGNED)
> > + > TYPE_PRECISION (cvt_type))
> > + || (wi::min_precision (op_min_value, SIGNED)
> > + > TYPE_PRECISION (cvt_type)))
> > + continue;
> > + }
> > +
> > + if (vinfo != NULL && slp_node != NULL)
> > + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
> > + else
> > + {
> > + bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
> > + || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
> > + cvt_type = build_nonstandard_integer_type
> > + (GET_MODE_BITSIZE (cvt_mode), uns);
> > + cvt_type = build_vector_type (cvt_type, nelts);
> > + }
>
> So this would then become
>
> cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE
> (vectype_in), cvt_type, TYPE_VECTOR_SUBPARTS (vectype_in));
>
> > + /* This should only happened for SLP as long as loop vectorizer
> > + only supports same-sized vector. */
> > + if (cvt_type == NULL_TREE
> > + || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
> > + || !supportable_convert_operation ((tree_code) *code1,
> > + vectype_out,
> > + cvt_type, &tc1)
> > + || !supportable_convert_operation ((tree_code) *code2,
> > + cvt_type,
> > + vectype_in, &tc2))
> > + continue;
> > +
> > + found_mode = true;
> > + break;
> > + }
> > +
> > + if (found_mode)
> > + {
> > + (*multi_step_cvt)++;
> > + interm_types->safe_push (cvt_type);
> > + cvt_type = NULL_TREE;
> > + *code1 = tc1;
> > + *code2 = tc2;
> > + return true;
> > + }
> > + }
> > + interm_types->release ();
>
> Hmm, ownership of interm_types is somewhat unclear here - the caller should
> release it, or is the situation that the caller is confused by stray elements
> in it? In
> that case I'd suggest to instead do interm_types->truncate (0).
>
It's my fault, I just imitate supportable_narrowing/widening_operation, I think
for this function, interm_types->release() is not needed.
BRs,
Lin