> -----Original Message----- > From: Richard Biener <rguent...@suse.de> > Sent: Monday, June 24, 2024 1:34 PM > To: Hu, Lin1 <lin1...@intel.com> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>; > ubiz...@gmail.com > Subject: RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> > int, float > -> float and int <-> float. > > On Thu, 20 Jun 2024, Hu, Lin1 wrote: > > > > > else if (ret_elt_bits > arg_elt_bits) > > > > modifier = WIDEN; > > > > > > > > + if (supportable_convert_operation (code, ret_type, arg_type, &code1)) > > > > + { > > > > + g = gimple_build_assign (lhs, code1, arg); > > > > + gsi_replace (gsi, g, false); > > > > + return; > > > > + } > > > > > > Given the API change I suggest below it might make sense to have > > > supportable_indirect_convert_operation do the above and represent it as > single- > > > step conversion? > > > > > > > OK, if you want to supportable_indirect_convert_operation can do > > something like supportable_convert_operation, I'll give it a try. This > > functionality is really the part that this function can cover. But this > > would require some changes not only the API change, because > > supportable_indirect_convert_operation originally only supported Float > > -> Int or Int ->Float. > > I think I'd like to see a single API to handle direct and > (multi-)indirect-level converts that operate on vectors with all > the same number of lanes. > > > > > > > > + code_helper code2 = ERROR_MARK, code3 = ERROR_MARK; > > > > + int multi_step_cvt = 0; > > > > + vec<tree> interm_types = vNULL; > > > > + if (supportable_indirect_convert_operation (NULL, > > > > + code, > > > > + ret_type, arg_type, > > > > + &code2, &code3, > > > > + &multi_step_cvt, > > > > + &interm_types, arg)) > > > > + { > > > > + new_rhs = make_ssa_name (interm_types[0]); > > > > + g = gimple_build_assign (new_rhs, (tree_code) code3, arg); > > > > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > > > > + g = gimple_build_assign (lhs, (tree_code) code2, new_rhs); > > > > + gsi_replace (gsi, g, false); > > > > + return; > > > > + } > > > > + > > > > if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == > > > FLOAT_EXPR)) > > > > { > > > > - if (supportable_convert_operation (code, ret_type, arg_type, > > > > &code1)) > > > > - { > > > > - g = gimple_build_assign (lhs, code1, arg); > > > > - gsi_replace (gsi, g, false); > > > > - return; > > > > - } > > > > /* Can't use get_compute_type here, as > > > > supportable_convert_operation > > > > doesn't necessarily use an optab and needs two arguments. */ > > > > tree vec_compute_type > > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index > > > > 05a169ecb2d..0aa608202ca 100644 > > > > --- a/gcc/tree-vect-stmts.cc > > > > +++ b/gcc/tree-vect-stmts.cc > > > > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo, > > > > tree scalar_dest; > > > > tree op0, op1 = NULL_TREE; > > > > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > > > > - tree_code tc1, tc2; > > > > + tree_code tc1; > > > > code_helper code, code1, code2; > > > > code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; > > > > tree new_temp; > > > > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo, > > > > break; > > > > } > > > > > > > > - /* For conversions between float and integer types try whether > > > > - we can use intermediate signed integer types to support the > > > > - conversion. */ > > > > - if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode) > > > > - && (code == FLOAT_EXPR || > > > > - (code == FIX_TRUNC_EXPR && !flag_trapping_math))) > > > > - { > > > > - bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE > > > (lhs_mode); > > > > - bool float_expr_p = code == FLOAT_EXPR; > > > > - unsigned short target_size; > > > > - scalar_mode intermediate_mode; > > > > - if (demotion) > > > > - { > > > > - intermediate_mode = lhs_mode; > > > > - target_size = GET_MODE_SIZE (rhs_mode); > > > > - } > > > > - else > > > > - { > > > > - target_size = GET_MODE_SIZE (lhs_mode); > > > > - if (!int_mode_for_size > > > > - (GET_MODE_BITSIZE (rhs_mode), 0).exists > > > (&intermediate_mode)) > > > > - goto unsupported; > > > > - } > > > > - code1 = float_expr_p ? code : NOP_EXPR; > > > > - codecvt1 = float_expr_p ? NOP_EXPR : code; > > > > - opt_scalar_mode mode_iter; > > > > - FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode) > > > > - { > > > > - intermediate_mode = mode_iter.require (); > > > > - > > > > - if (GET_MODE_SIZE (intermediate_mode) > target_size) > > > > - break; > > > > - > > > > - scalar_mode cvt_mode; > > > > - if (!int_mode_for_size > > > > - (GET_MODE_BITSIZE (intermediate_mode), 0).exists > > > (&cvt_mode)) > > > > - break; > > > > - > > > > - cvt_type = build_nonstandard_integer_type > > > > - (GET_MODE_BITSIZE (cvt_mode), 0); > > > > - > > > > - /* Check if the intermediate type can hold OP0's range. > > > > - When converting from float to integer this is not > > > > necessary > > > > - because values that do not fit the (smaller) target > > > > type are > > > > - unspecified anyway. */ > > > > - if (demotion && float_expr_p) > > > > - { > > > > - wide_int op_min_value, op_max_value; > > > > - if (!vect_get_range_info (op0, &op_min_value, > > > &op_max_value)) > > > > - break; > > > > - > > > > - if (cvt_type == NULL_TREE > > > > - || (wi::min_precision (op_max_value, SIGNED) > > > > - > TYPE_PRECISION (cvt_type)) > > > > - || (wi::min_precision (op_min_value, SIGNED) > > > > - > TYPE_PRECISION (cvt_type))) > > > > - continue; > > > > - } > > > > - > > > > - cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, > > > > slp_node); > > > > - /* This should only happened for SLP as long as loop > > > > vectorizer > > > > - only supports same-sized vector. */ > > > > - if (cvt_type == NULL_TREE > > > > - || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), > > > > nunits_in) > > > > - || !supportable_convert_operation ((tree_code) code1, > > > > - vectype_out, > > > > - cvt_type, &tc1) > > > > - || !supportable_convert_operation ((tree_code) > > > > codecvt1, > > > > - cvt_type, > > > > - vectype_in, &tc2)) > > > > - continue; > > > > - > > > > - found_mode = true; > > > > - break; > > > > - } > > > > + if (supportable_indirect_convert_operation (vinfo, > > > > + code, > > > > + vectype_out, > > > > + vectype_in, > > > > + &code1, > > > > + &codecvt1, > > > > + &multi_step_cvt, > > > > + &interm_types, > > > > + op0,slp_node)) > > > > + break; > > > > > > > > - if (found_mode) > > > > - { > > > > - multi_step_cvt++; > > > > - interm_types.safe_push (cvt_type); > > > > - cvt_type = NULL_TREE; > > > > - code1 = tc1; > > > > - codecvt1 = tc2; > > > > - break; > > > > - } > > > > - } > > > > /* FALLTHRU */ > > > > unsupported: > > > > if (dump_enabled_p ()) > > > > @@ -14626,6 +14551,153 @@ supportable_narrowing_operation > > > (code_helper code, > > > > return false; > > > > } > > > > > > > > +/* Function supportable_indirect_convert_operation > > > > + > > > > + Check whether an operation represented by the code CODE is two > > > > + convert operations that are supported by the target platform in > > > > + vector form (i.e., when operating on arguments of type VECTYPE_IN > > > > + producing a result of type VECTYPE_OUT). > > > > + > > > > + Convert operations we currently support directly are FIX_TRUNC and > FLOAT. > > > > + This function checks if these operations are supported > > > > + by the target platform directly (via vector tree-codes). > > > > + > > > > + Output: > > > > + - CODE1 is the code of a vector operation to be used when > > > > + converting the operation in the first step, if available. > > > > + - CODE2 is the code of a vector operation to be used when > > > > + converting the operation in the second step, if available. > > > > + - MULTI_STEP_CVT determines the number of required intermediate > > > > steps > > > in > > > > + case of multi-step conversion (like int->short->char - in that case > > > > + MULTI_STEP_CVT will be 1). In the function, it should be 1. > > > > + - INTERM_TYPES contains the intermediate type required to perform > > > > the > > > > + convert operation (short in the above example). */ > > > > +bool > > > > +supportable_indirect_convert_operation (vec_info *vinfo, > > > > + code_helper code, > > > > + tree vectype_out, > > > > + tree vectype_in, > > > > + code_helper *code1, > > > > + code_helper *code2, > > > > + int *multi_step_cvt, > > > > + vec<tree> *interm_types, > > > > > > This API is somewhat awkward, as we're inventing a new one I guess we can > > > do > > > better. I think we want > > > > > > vec<std::pair<tree, tree_code> > *converts, > > > > > > covering all code1, code2, multi_step_cvt and interm_types with the > conversion > > > sequence being > > > > > > converts[0].first tem0 = converts[0].second op0; > > > converts[1].first tem1 = converts[1].second tem; > > > > > > > That's great, this really makes the function work better. > > > > > > > > ... while converts.length () determines the length of the chain, one > > > being a > direct > > > conversion where then converts[0].first is vectype_out. That would allow > > > double -> char to go double -> float -> int -> short -> char for example. > > > > > > > I'm trying to determine the requirements, do you want this function to > > support multiple conversions (the current implementation just does a > > two-step conversion, like double -> char, which becomes double -> int -> > > char). Actually we should be able to do all conversions in two steps, if > > we have some suitable instructions. I can't think of a scenario where > > multiple conversions are needed yet. Could you give me some examples? Of > > course, I could tweak this feature in advance if it is for future > > consideration. > > I think the API should support multi-level, not only two levels. The > implementation doesn't need to cover that case unless we run into > such a requirement. Usually vector ISAs allow 2x integer > widening/shortening but not 4x, so a VnDImode -> VnQImode conversion > would need to go via VnSImode and VnHImode (of course some targets > might "help" the vectorizer by providing a VnDImode -> VnQImode > pattern that does the intermediate conversions behind the vectorizers > back). But yes, the original motivation for the vectorizer code > was that float<->int conversions are limited. >
I have a similar patch in this area but instead looking at unsigned int <-> double conversion. I would want to avoid complicating this area too much so it would be good if the API doesn't care about sign either and allows the target to choose the operation mode? My current patch has a backend target hook that asks if the current widening is Preferred as multilevel or single level. For single level I just generate VEC_PERM_EXPRs with a zero register. Just wanted to bring it up in case we can have a coherent story around this conversions. Thanks, Tamar > Thanks, > Richard. > > > > > > > > > > + tree op0, > > > > + slp_tree slp_node) > > > > > > I would like to avoid passing VINFO and SLP_NODE here, see below. > > > The same is true for OP0 where the existing use is wrong for SLP already, > > > but I > > > guess that can stay for now (I opened PR115538 about the wrong-code > > > issue). > > > > > > > Thanks, I have removed them. > > > > > > > > > +{ > > > > + bool found_mode = false; > > > > + scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out)); > > > > + scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in)); > > > > + opt_scalar_mode mode_iter; > > > > + tree_code tc1, tc2; > > > > + > > > > + tree cvt_type = NULL_TREE; > > > > + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in); > > > > + > > > > + (*multi_step_cvt) = 0; > > > > + /* For conversions between float and integer types try whether > > > > + we can use intermediate signed integer types to support the > > > > + conversion. */ > > > > + if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode) > > > > + && (code == FLOAT_EXPR > > > > + || (code == FIX_TRUNC_EXPR && !flag_trapping_math))) > > > > + { > > > > + bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE > > > (lhs_mode); > > > > + bool float_expr_p = code == FLOAT_EXPR; > > > > + unsigned short target_size; > > > > + scalar_mode intermediate_mode; > > > > + if (demotion) > > > > + { > > > > + intermediate_mode = lhs_mode; > > > > + target_size = GET_MODE_SIZE (rhs_mode); > > > > + } > > > > + else > > > > + { > > > > + target_size = GET_MODE_SIZE (lhs_mode); > > > > + if (!int_mode_for_size > > > > + (GET_MODE_BITSIZE (rhs_mode), 0).exists > > > > (&intermediate_mode)) > > > > + return false; > > > > + } > > > > + *code1 = float_expr_p ? code : NOP_EXPR; > > > > + *code2 = float_expr_p ? NOP_EXPR : code; > > > > + opt_scalar_mode mode_iter; > > > > + FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode) > > > > + { > > > > + intermediate_mode = mode_iter.require (); > > > > + > > > > + if (GET_MODE_SIZE (intermediate_mode) > target_size) > > > > + break; > > > > + > > > > + scalar_mode cvt_mode; > > > > + if (!int_mode_for_size > > > > + (GET_MODE_BITSIZE (intermediate_mode), 0).exists > > > > (&cvt_mode)) > > > > + break; > > > > + > > > > + cvt_type = build_nonstandard_integer_type > > > > + (GET_MODE_BITSIZE (cvt_mode), 0); > > > > + > > > > + /* Check if the intermediate type can hold OP0's range. > > > > + When converting from float to integer this is not necessary > > > > + because values that do not fit the (smaller) target type > > > > are > > > > + unspecified anyway. */ > > > > + if (demotion && float_expr_p) > > > > + { > > > > + wide_int op_min_value, op_max_value; > > > > + /* For vector form, it looks like op0 doesn't have > > > > RANGE_INFO. > > > > + In the future, if it is supported, changes may need to > > > > be made > > > > + to this part, such as checking the RANGE of each > > > > element > > > > + in the vector. */ > > > > + if (!SSA_NAME_RANGE_INFO (op0) > > > > + || !vect_get_range_info (op0, &op_min_value, > > > &op_max_value)) > > > > + break; > > > > + > > > > + if (cvt_type == NULL_TREE > > > > + || (wi::min_precision (op_max_value, SIGNED) > > > > + > TYPE_PRECISION (cvt_type)) > > > > + || (wi::min_precision (op_min_value, SIGNED) > > > > + > TYPE_PRECISION (cvt_type))) > > > > + continue; > > > > + } > > > > + > > > > + if (vinfo != NULL && slp_node != NULL) > > > > + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, > > > > slp_node); > > > > + else > > > > + { > > > > + bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out)) > > > > + || TYPE_UNSIGNED (TREE_TYPE (vectype_in)); > > > > + cvt_type = build_nonstandard_integer_type > > > > + (GET_MODE_BITSIZE (cvt_mode), uns); > > > > + cvt_type = build_vector_type (cvt_type, nelts); > > > > + } > > > > > > So this would then become > > > > > > cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE > > > (vectype_in), cvt_type, TYPE_VECTOR_SUBPARTS (vectype_in)); > > > > > > > + /* This should only happened for SLP as long as loop > > > > vectorizer > > > > + only supports same-sized vector. */ > > > > + if (cvt_type == NULL_TREE > > > > + || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts) > > > > + || !supportable_convert_operation ((tree_code) *code1, > > > > + vectype_out, > > > > + cvt_type, &tc1) > > > > + || !supportable_convert_operation ((tree_code) *code2, > > > > + cvt_type, > > > > + vectype_in, &tc2)) > > > > + continue; > > > > + > > > > + found_mode = true; > > > > + break; > > > > + } > > > > + > > > > + if (found_mode) > > > > + { > > > > + (*multi_step_cvt)++; > > > > + interm_types->safe_push (cvt_type); > > > > + cvt_type = NULL_TREE; > > > > + *code1 = tc1; > > > > + *code2 = tc2; > > > > + return true; > > > > + } > > > > + } > > > > + interm_types->release (); > > > > > > Hmm, ownership of interm_types is somewhat unclear here - the caller > > > should > > > release it, or is the situation that the caller is confused by stray > > > elements in it? > In > > > that case I'd suggest to instead do interm_types->truncate (0). > > > > > > > It's my fault, I just imitate supportable_narrowing/widening_operation, > > I think for this function, interm_types->release() is not needed.