Oluwatamilore Adebayo <oluwatamilore.adeb...@arm.com> writes: > From: oluade01 <oluwatamilore.adeb...@arm.com> > > This updates vect_recog_abd_pattern to recognize the widening > variant of absolute difference (ABDL, ABDL2). > > gcc/ChangeLog: > > * internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p): > Add IFN_VEC_WIDEN_ABD to the switch statement. > * internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab. > * optabs.def (vec_widen_sabd_optab, > vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab, > vec_widen_sabd_odd_even, vec_widen_sabd_even_optab, > vec_widen_uabd_optab, > vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab, > vec_widen_uabd_odd_even, vec_widen_uabd_even_optab): > New optabs. > * tree-vect-patterns.cc (vect_recog_abd_pattern): Update to > to build a VEC_WIDEN_ABD call if the input precision is smaller > than the precision of the output. > (vect_recog_widen_abd_pattern): Should an ABD expression be > found preceeding an extension, replace the two with a > VEC_WIDEN_ABD. > --- > gcc/doc/md.texi | 11 ++ > gcc/internal-fn.def | 5 + > gcc/optabs.def | 10 ++ > gcc/tree-vect-patterns.cc | 205 +++++++++++++++++++++++++++++--------- > 4 files changed, 183 insertions(+), 48 deletions(-) > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index > e11b10d2fca11016232921bc85e47975f700e6c6..2ae6182b925d0cf8950dc830d083cf93baf2eaa1 > 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -5617,6 +5617,17 @@ signed/unsigned elements of size S@. Subtract the > high/low elements of 2 from > 1 and widen the resulting elements. Put the N/2 results of size 2*S in the > output vector (operand 0). > > +@cindex @code{vec_widen_sabdl_hi_@var{m}} instruction pattern > +@cindex @code{vec_widen_sabdl_lo_@var{m}} instruction pattern > +@cindex @code{vec_widen_uabdl_hi_@var{m}} instruction pattern > +@cindex @code{vec_widen_uabdl_lo_@var{m}} instruction pattern > +@item @samp{vec_widen_uabdl_hi_@var{m}}, @samp{vec_widen_uabdl_lo_@var{m}} > +@itemx @samp{vec_widen_sabdl_hi_@var{m}}, @samp{vec_widen_sabdl_lo_@var{m}}
The optabs don't have the trailing “l” (long). (Which is a good thing!) The list should include the even/odd patterns as well. > +Signed/Unsigned widening absolute difference long. Operands 1 and 2 are Similarly no “long” here. > +vectors with N signed/unsigned elements of size S@. Find the absolute > +difference between 1 and 2 and widen the resulting elements. Put the N/2 Maybe “operands 1 and 2”, or just “them”. > +results of size 2*S in the output vector (operand 0). > + > @cindex @code{vec_addsub@var{m}3} instruction pattern > @item @samp{vec_addsub@var{m}3} > Alternating subtract, add with even lanes doing subtract and odd > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index > 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 > 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS, > first, > vec_widen_ssub, vec_widen_usub, > binary) > +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD, > + ECF_CONST | ECF_NOTHROW, > + first, > + vec_widen_sabd, vec_widen_uabd, > + binary) > DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary) > DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > index > 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 > 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a") > OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a") > OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a") > OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a") > +OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a") > +OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a") > +OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a") > +OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a") > +OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a") > OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a") > OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a") > OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a") > @@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a") > OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a") > OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a") > OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a") > +OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a") > +OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a") > +OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a") > +OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a") > +OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a") > OPTAB_D (vec_addsub_optab, "vec_addsub$a3") > OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4") > OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4") > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index > e2392113bff4065c909aefc760b4c48978b73a5a..281d7bc2e9945ee415be051f5ec1cce19251fbbf > 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo, > gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt); > if (!abd_stmt > || !gimple_call_internal_p (abd_stmt) > - || gimple_call_internal_fn (abd_stmt) != IFN_ABD) > + || gimple_call_num_args (abd_stmt) != 2) > return NULL; > > tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0); > tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1); > > - if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, > &unprom[0]) > - || !vect_look_through_possible_promotion (vinfo, abd_oprnd1, > - &unprom[1])) > + if (gimple_call_internal_fn (abd_stmt) == IFN_ABD) > + { > + if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, > + &unprom[0]) > + || !vect_look_through_possible_promotion (vinfo, abd_oprnd1, > + &unprom[1])) > + return NULL; > + } > + else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD) > + { > + unprom[0].op = abd_oprnd0; > + unprom[0].type = TREE_TYPE (abd_oprnd0); > + unprom[1].op = abd_oprnd1; > + unprom[1].type = TREE_TYPE (abd_oprnd1); > + } > + else > return NULL; > > half_type = unprom[0].type; > @@ -1442,16 +1455,19 @@ vect_recog_sad_pattern (vec_info *vinfo, > > /* Function vect_recog_abd_pattern > > - Try to find the following ABsolute Difference (ABD) pattern: > + Try to find the following ABsolute Difference (ABD) or > + widening ABD (WIDEN_ABD) pattern: > > - VTYPE x, y, out; > - type diff; > - loop i in range: > - S1 diff = x[i] - y[i] > - S2 out[i] = ABS_EXPR <diff>; > + TYPE1 x; > + TYPE2 y; > + TYPE3 x_cast = (TYPE3) x; // widening or no-op > + TYPE3 y_cast = (TYPE3) y; // widening or no-op > + TYPE3 diff = x_cast - y_cast; > + TYPE4 diff_cast = (TYPE4) diff; // widening or no-op > + TYPE5 abs = ABS(U)_EXPR <diff_cast>; > > - where 'type' is a integer and 'VTYPE' is a vector of integers > - the same size as 'type' > + WIDEN_ABD exists to optimize the case where WTYPE is at least > + twice as wide as VTYPE. Sorry, my fault. I was using the original type names in this suggestion, rather than the TYPE1…TYPE5 ones. Should be: WIDEN_ABD exists to optimize the case where TYPE4 is at least twice as wide as TYPE3. > > Input: > > @@ -1459,30 +1475,18 @@ vect_recog_sad_pattern (vec_info *vinfo, > > Output: > > - * TYPE_out: The type of the output of this pattern > + * TYPE_OUT: The type of the output of this pattern > > * Return value: A new stmt that will be used to replace the sequence of > - stmts that constitute the pattern; either SABD or UABD: > - SABD_EXPR<x, y, out> > - UABD_EXPR<x, y, out> > + stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL: > + IFN_ABD<x, y, out> > + IFN_WIDEN_ABD<x, y, out> Lingering use of “L” suffixes here. Maybe: stmts that constitute the pattern, principally: out = IFN_ABD (x, y) out = IFN_WIDEN_ABD (x, y) > */ > > static gimple * > vect_recog_abd_pattern (vec_info *vinfo, > stmt_vec_info stmt_vinfo, tree *type_out) > { > - /* Look for the following patterns > - X = x[i] > - Y = y[i] > - DIFF = X - Y > - DAD = ABS_EXPR<DIFF> > - out[i] = DAD > - > - In which > - - X, Y, DIFF, DAD all have the same type > - - x, y, out are all vectors of the same type > - */ > - > gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo)); > if (!last_stmt) > return NULL; > @@ -1496,54 +1500,83 @@ vect_recog_abd_pattern (vec_info *vinfo, > unprom, &diff_stmt)) > return NULL; > > - tree abd_type = out_type, vectype; > - tree abd_oprnds[2]; > - bool extend = false; > + tree abd_in_type, abd_out_type; > + > if (half_type) > { > - vectype = get_vectype_for_scalar_type (vinfo, half_type); > - abd_type = half_type; > - extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type); > + abd_in_type = half_type; > + abd_out_type = abd_in_type; > } > else > { > unprom[0].op = gimple_assign_rhs1 (diff_stmt); > unprom[1].op = gimple_assign_rhs2 (diff_stmt); > - tree signed_out = signed_type_for (out_type); > - vectype = get_vectype_for_scalar_type (vinfo, signed_out); > + abd_in_type = signed_type_for (out_type); > + abd_out_type = abd_in_type; > } > > - vect_pattern_detected ("vect_recog_abd_pattern", last_stmt); > + tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type); > + if (!vectype_in) > + return NULL; > > - if (!vectype > - || !direct_internal_fn_supported_p (IFN_ABD, vectype, > + internal_fn ifn = IFN_ABD; > + tree vectype_out = vectype_in; > + > + if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2 > + && TYPE_PRECISION (abd_out_type) != stmt_vinfo->min_output_precision) Sorry for not noticing last time, but I think the second condition would be more natural as: && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2) (There's no distinction between abs_in_type and abs_out_type at this point, so it seems clearer to use the same value in both conditions.) > + { > + tree mid_type > + = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2, > + TYPE_UNSIGNED (abd_in_type)); > + tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type); > + > + code_helper dummy_code; > + int dummy_int; > + auto_vec<tree> dummy_vec; > + if (mid_vectype > + && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, > + stmt_vinfo, mid_vectype, > + vectype_in, > + &dummy_code, &dummy_code, > + &dummy_int, &dummy_vec)) > + { > + ifn = IFN_VEC_WIDEN_ABD; > + abd_out_type = mid_type; > + vectype_out = mid_vectype; > + } > + } > + > + if (ifn == IFN_ABD > + && !direct_internal_fn_supported_p (ifn, vectype_in, > OPTIMIZE_FOR_SPEED)) > return NULL; > > + vect_pattern_detected ("vect_recog_abd_pattern", last_stmt); > + > + tree abd_oprnds[2]; > vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, > - TREE_TYPE (vectype), unprom, vectype); > + abd_in_type, unprom, vectype_in); > > *type_out = get_vectype_for_scalar_type (vinfo, out_type); > > - tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL); > - gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2, > + tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL); > + gcall *abd_stmt = gimple_build_call_internal (ifn, 2, > abd_oprnds[0], abd_oprnds[1]); > gimple_call_set_lhs (abd_stmt, abd_result); > gimple_set_location (abd_stmt, gimple_location (last_stmt)); > > - if (!extend) > - return abd_stmt; > - > gimple *stmt = abd_stmt; > - if (!TYPE_UNSIGNED (abd_type)) > + if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type) > + && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type) > + && !TYPE_UNSIGNED (abd_out_type)) > { > - tree unsign = unsigned_type_for (abd_type); > + tree unsign = unsigned_type_for (abd_out_type); > tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign); > stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, > unsign_vectype); > } > > - return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype); > + return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, > vectype_out); > } > > /* Recognize an operation that performs ORIG_CODE on widened inputs, > @@ -1703,6 +1736,81 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, > stmt_vec_info last_stmt_info, > &subtype); > } > > +/* Try to detect abd on widened inputs, converting IFN_ABD > + to IFN_VEC_WIDEN_ABD. */ > +static gimple * > +vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, > + tree *type_out) > +{ > + gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo)); > + if (!last_stmt || !gimple_assign_cast_p (last_stmt)) I think this should be: if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt))) gimple_assign_cast_p is more general, and allows conversions between integral and non-integral types. > + return NULL; > + > + tree last_rhs = gimple_assign_rhs1 (last_stmt); > + > + tree in_type = TREE_TYPE (last_rhs); > + tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt)); > + if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)) > + return NULL; I think this also needs to require TYPE_UNSIGNED (in_type): if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type) || !TYPE_UNSIGNED (in_type)) return NULL; That is, the extension has to be a zero extension rather than a sign extension. For example: int32_t a, b, c; int64_t d; c = IFN_ABD (a, b); d = (int64_t) c; sign-extends the ABD result to 64 bits, and so a == INT_MAX && b == INT_MIN gives: c = -1 (UINT_MAX converted to signed) d = -1 But IFN_WIDEN_ABD would give d == UINT_MAX instead. > + > + stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs); > + if (!abs_vinfo) > + return NULL; > + > + stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo); > + if (!abd_pattern_vinfo) > + return NULL; > + > + gimple *pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo); > + if (gimple_assign_cast_p (pattern_stmt)) > + { > + tree op = gimple_assign_rhs1 (pattern_stmt); > + vect_unpromoted_value unprom; > + op = vect_look_through_possible_promotion (vinfo, op, &unprom); > + > + if (!op) > + return NULL; > + > + abd_pattern_vinfo = vect_get_internal_def (vinfo, op); > + if (!abd_pattern_vinfo) > + return NULL; > + > + pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo); > + } I think the code quoted above reduces to: vect_unpromoted_value unprom; tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom); if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type)) return NULL; stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op); if (!abd_pattern_vinfo) return NULL; abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo); > + > + gcall *abd_stmt = dyn_cast <gcall *> (pattern_stmt); > + if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD) > + return NULL; > + > + tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0); > + tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1); > + if (TYPE_PRECISION (TREE_TYPE (abd_oprnd0)) != TYPE_PRECISION (in_type)) > + return NULL; With the changes above, this check would not be necessary. LGTM otherwise, thanks. Richard > + > + tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type); > + tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type); > + > + code_helper dummy_code; > + int dummy_int; > + auto_vec<tree> dummy_vec; > + if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo, > + vectype_out, vectype_in, > + &dummy_code, &dummy_code, > + &dummy_int, &dummy_vec)) > + return NULL; > + > + vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt); > + > + *type_out = vectype_out; > + > + tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL); > + gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2, > + abd_oprnd0, abd_oprnd1); > + gimple_call_set_lhs (widen_abd_stmt, widen_abd_result); > + gimple_set_location (widen_abd_stmt, gimple_location (last_stmt)); > + return widen_abd_stmt; > +} > + > /* Function vect_recog_ctz_ffs_pattern > > Try to find the following pattern: > @@ -6670,6 +6778,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { > { vect_recog_mask_conversion_pattern, "mask_conversion" }, > { vect_recog_widen_plus_pattern, "widen_plus" }, > { vect_recog_widen_minus_pattern, "widen_minus" }, > + { vect_recog_widen_abd_pattern, "widen_abd" }, > /* These must come after the double widening ones. */ > };