> > else if (ret_elt_bits > arg_elt_bits) > > modifier = WIDEN; > > > > + if (supportable_convert_operation (code, ret_type, arg_type, &code1)) > > + { > > + g = gimple_build_assign (lhs, code1, arg); > > + gsi_replace (gsi, g, false); > > + return; > > + } > > Given the API change I suggest below it might make sense to have > supportable_indirect_convert_operation do the above and represent it as > single- > step conversion? >
OK, if you want to supportable_indirect_convert_operation can do something like supportable_convert_operation, I'll give it a try. This functionality is really the part that this function can cover. But this would require some changes not only the API change, because supportable_indirect_convert_operation originally only supported Float -> Int or Int ->Float. > > > + code_helper code2 = ERROR_MARK, code3 = ERROR_MARK; > > + int multi_step_cvt = 0; > > + vec<tree> interm_types = vNULL; > > + if (supportable_indirect_convert_operation (NULL, > > + code, > > + ret_type, arg_type, > > + &code2, &code3, > > + &multi_step_cvt, > > + &interm_types, arg)) > > + { > > + new_rhs = make_ssa_name (interm_types[0]); > > + g = gimple_build_assign (new_rhs, (tree_code) code3, arg); > > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > > + g = gimple_build_assign (lhs, (tree_code) code2, new_rhs); > > + gsi_replace (gsi, g, false); > > + return; > > + } > > + > > if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == > FLOAT_EXPR)) > > { > > - if (supportable_convert_operation (code, ret_type, arg_type, &code1)) > > - { > > - g = gimple_build_assign (lhs, code1, arg); > > - gsi_replace (gsi, g, false); > > - return; > > - } > > /* Can't use get_compute_type here, as supportable_convert_operation > > doesn't necessarily use an optab and needs two arguments. */ > > tree vec_compute_type > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index > > 05a169ecb2d..0aa608202ca 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo, > > tree scalar_dest; > > tree op0, op1 = NULL_TREE; > > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > > - tree_code tc1, tc2; > > + tree_code tc1; > > code_helper code, code1, code2; > > code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; > > tree new_temp; > > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo, > > break; > > } > > > > - /* For conversions between float and integer types try whether > > - we can use intermediate signed integer types to support the > > - conversion. */ > > - if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode) > > - && (code == FLOAT_EXPR || > > - (code == FIX_TRUNC_EXPR && !flag_trapping_math))) > > - { > > - bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE > (lhs_mode); > > - bool float_expr_p = code == FLOAT_EXPR; > > - unsigned short target_size; > > - scalar_mode intermediate_mode; > > - if (demotion) > > - { > > - intermediate_mode = lhs_mode; > > - target_size = GET_MODE_SIZE (rhs_mode); > > - } > > - else > > - { > > - target_size = GET_MODE_SIZE (lhs_mode); > > - if (!int_mode_for_size > > - (GET_MODE_BITSIZE (rhs_mode), 0).exists > (&intermediate_mode)) > > - goto unsupported; > > - } > > - code1 = float_expr_p ? code : NOP_EXPR; > > - codecvt1 = float_expr_p ? NOP_EXPR : code; > > - opt_scalar_mode mode_iter; > > - FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode) > > - { > > - intermediate_mode = mode_iter.require (); > > - > > - if (GET_MODE_SIZE (intermediate_mode) > target_size) > > - break; > > - > > - scalar_mode cvt_mode; > > - if (!int_mode_for_size > > - (GET_MODE_BITSIZE (intermediate_mode), 0).exists > (&cvt_mode)) > > - break; > > - > > - cvt_type = build_nonstandard_integer_type > > - (GET_MODE_BITSIZE (cvt_mode), 0); > > - > > - /* Check if the intermediate type can hold OP0's range. > > - When converting from float to integer this is not necessary > > - because values that do not fit the (smaller) target type are > > - unspecified anyway. */ > > - if (demotion && float_expr_p) > > - { > > - wide_int op_min_value, op_max_value; > > - if (!vect_get_range_info (op0, &op_min_value, > &op_max_value)) > > - break; > > - > > - if (cvt_type == NULL_TREE > > - || (wi::min_precision (op_max_value, SIGNED) > > - > TYPE_PRECISION (cvt_type)) > > - || (wi::min_precision (op_min_value, SIGNED) > > - > TYPE_PRECISION (cvt_type))) > > - continue; > > - } > > - > > - cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, > > slp_node); > > - /* This should only happened for SLP as long as loop vectorizer > > - only supports same-sized vector. */ > > - if (cvt_type == NULL_TREE > > - || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in) > > - || !supportable_convert_operation ((tree_code) code1, > > - vectype_out, > > - cvt_type, &tc1) > > - || !supportable_convert_operation ((tree_code) codecvt1, > > - cvt_type, > > - vectype_in, &tc2)) > > - continue; > > - > > - found_mode = true; > > - break; > > - } > > + if (supportable_indirect_convert_operation (vinfo, > > + code, > > + vectype_out, > > + vectype_in, > > + &code1, > > + &codecvt1, > > + &multi_step_cvt, > > + &interm_types, > > + op0,slp_node)) > > + break; > > > > - if (found_mode) > > - { > > - multi_step_cvt++; > > - interm_types.safe_push (cvt_type); > > - cvt_type = NULL_TREE; > > - code1 = tc1; > > - codecvt1 = tc2; > > - break; > > - } > > - } > > /* FALLTHRU */ > > unsupported: > > if (dump_enabled_p ()) > > @@ -14626,6 +14551,153 @@ supportable_narrowing_operation > (code_helper code, > > return false; > > } > > > > +/* Function supportable_indirect_convert_operation > > + > > + Check whether an operation represented by the code CODE is two > > + convert operations that are supported by the target platform in > > + vector form (i.e., when operating on arguments of type VECTYPE_IN > > + producing a result of type VECTYPE_OUT). > > + > > + Convert operations we currently support directly are FIX_TRUNC and > > FLOAT. > > + This function checks if these operations are supported > > + by the target platform directly (via vector tree-codes). > > + > > + Output: > > + - CODE1 is the code of a vector operation to be used when > > + converting the operation in the first step, if available. > > + - CODE2 is the code of a vector operation to be used when > > + converting the operation in the second step, if available. > > + - MULTI_STEP_CVT determines the number of required intermediate steps > in > > + case of multi-step conversion (like int->short->char - in that case > > + MULTI_STEP_CVT will be 1). In the function, it should be 1. > > + - INTERM_TYPES contains the intermediate type required to perform the > > + convert operation (short in the above example). */ > > +bool > > +supportable_indirect_convert_operation (vec_info *vinfo, > > + code_helper code, > > + tree vectype_out, > > + tree vectype_in, > > + code_helper *code1, > > + code_helper *code2, > > + int *multi_step_cvt, > > + vec<tree> *interm_types, > > This API is somewhat awkward, as we're inventing a new one I guess we can do > better. I think we want > > vec<std::pair<tree, tree_code> > *converts, > > covering all code1, code2, multi_step_cvt and interm_types with the conversion > sequence being > > converts[0].first tem0 = converts[0].second op0; > converts[1].first tem1 = converts[1].second tem; > That's great, this really makes the function work better. > > ... while converts.length () determines the length of the chain, one being a > direct > conversion where then converts[0].first is vectype_out. That would allow > double -> char to go double -> float -> int -> short -> char for example. > I'm trying to determine the requirements, do you want this function to support multiple conversions (the current implementation just does a two-step conversion, like double -> char, which becomes double -> int -> char). Actually we should be able to do all conversions in two steps, if we have some suitable instructions. I can't think of a scenario where multiple conversions are needed yet. Could you give me some examples? Of course, I could tweak this feature in advance if it is for future consideration. > > > + tree op0, > > + slp_tree slp_node) > > I would like to avoid passing VINFO and SLP_NODE here, see below. > The same is true for OP0 where the existing use is wrong for SLP already, but > I > guess that can stay for now (I opened PR115538 about the wrong-code issue). > Thanks, I have removed them. > > > +{ > > + bool found_mode = false; > > + scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out)); > > + scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in)); > > + opt_scalar_mode mode_iter; > > + tree_code tc1, tc2; > > + > > + tree cvt_type = NULL_TREE; > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in); > > + > > + (*multi_step_cvt) = 0; > > + /* For conversions between float and integer types try whether > > + we can use intermediate signed integer types to support the > > + conversion. */ > > + if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode) > > + && (code == FLOAT_EXPR > > + || (code == FIX_TRUNC_EXPR && !flag_trapping_math))) > > + { > > + bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE > (lhs_mode); > > + bool float_expr_p = code == FLOAT_EXPR; > > + unsigned short target_size; > > + scalar_mode intermediate_mode; > > + if (demotion) > > + { > > + intermediate_mode = lhs_mode; > > + target_size = GET_MODE_SIZE (rhs_mode); > > + } > > + else > > + { > > + target_size = GET_MODE_SIZE (lhs_mode); > > + if (!int_mode_for_size > > + (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode)) > > + return false; > > + } > > + *code1 = float_expr_p ? code : NOP_EXPR; > > + *code2 = float_expr_p ? NOP_EXPR : code; > > + opt_scalar_mode mode_iter; > > + FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode) > > + { > > + intermediate_mode = mode_iter.require (); > > + > > + if (GET_MODE_SIZE (intermediate_mode) > target_size) > > + break; > > + > > + scalar_mode cvt_mode; > > + if (!int_mode_for_size > > + (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode)) > > + break; > > + > > + cvt_type = build_nonstandard_integer_type > > + (GET_MODE_BITSIZE (cvt_mode), 0); > > + > > + /* Check if the intermediate type can hold OP0's range. > > + When converting from float to integer this is not necessary > > + because values that do not fit the (smaller) target type are > > + unspecified anyway. */ > > + if (demotion && float_expr_p) > > + { > > + wide_int op_min_value, op_max_value; > > + /* For vector form, it looks like op0 doesn't have RANGE_INFO. > > + In the future, if it is supported, changes may need to be made > > + to this part, such as checking the RANGE of each element > > + in the vector. */ > > + if (!SSA_NAME_RANGE_INFO (op0) > > + || !vect_get_range_info (op0, &op_min_value, > &op_max_value)) > > + break; > > + > > + if (cvt_type == NULL_TREE > > + || (wi::min_precision (op_max_value, SIGNED) > > + > TYPE_PRECISION (cvt_type)) > > + || (wi::min_precision (op_min_value, SIGNED) > > + > TYPE_PRECISION (cvt_type))) > > + continue; > > + } > > + > > + if (vinfo != NULL && slp_node != NULL) > > + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node); > > + else > > + { > > + bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out)) > > + || TYPE_UNSIGNED (TREE_TYPE (vectype_in)); > > + cvt_type = build_nonstandard_integer_type > > + (GET_MODE_BITSIZE (cvt_mode), uns); > > + cvt_type = build_vector_type (cvt_type, nelts); > > + } > > So this would then become > > cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE > (vectype_in), cvt_type, TYPE_VECTOR_SUBPARTS (vectype_in)); > > > + /* This should only happened for SLP as long as loop vectorizer > > + only supports same-sized vector. */ > > + if (cvt_type == NULL_TREE > > + || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts) > > + || !supportable_convert_operation ((tree_code) *code1, > > + vectype_out, > > + cvt_type, &tc1) > > + || !supportable_convert_operation ((tree_code) *code2, > > + cvt_type, > > + vectype_in, &tc2)) > > + continue; > > + > > + found_mode = true; > > + break; > > + } > > + > > + if (found_mode) > > + { > > + (*multi_step_cvt)++; > > + interm_types->safe_push (cvt_type); > > + cvt_type = NULL_TREE; > > + *code1 = tc1; > > + *code2 = tc2; > > + return true; > > + } > > + } > > + interm_types->release (); > > Hmm, ownership of interm_types is somewhat unclear here - the caller should > release it, or is the situation that the caller is confused by stray elements > in it? In > that case I'd suggest to instead do interm_types->truncate (0). > It's my fault, I just imitate supportable_narrowing/widening_operation, I think for this function, interm_types->release() is not needed. BRs, Lin