> -----Original Message-----
> From: Richard Biener <rguent...@suse.de>
> Sent: Monday, June 3, 2024 5:03 PM
> To: Hu, Lin1 <lin1...@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>;
> ubiz...@gmail.com
> Subject: RE: [PATCH 1/3] vect: generate suitable convert insn for int -> int, 
> float
> -> float and int <-> float.
> 
> On Mon, 3 Jun 2024, Hu, Lin1 wrote:
> 
> > > -----Original Message-----
> > > From: Richard Biener <rguent...@suse.de>
> > > Sent: Friday, May 31, 2024 8:41 PM
> > > To: Hu, Lin1 <lin1...@intel.com>
> > > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>;
> > > ubiz...@gmail.com
> > > Subject: RE: [PATCH 1/3] vect: generate suitable convert insn for
> > > int -> int, float
> > > -> float and int <-> float.
> > >
> > > On Fri, 31 May 2024, Hu, Lin1 wrote:
> > >
> > > > > -----Original Message-----
> > > > > From: Richard Biener <rguent...@suse.de>
> > > > > Sent: Wednesday, May 29, 2024 5:41 PM
> > > > > To: Hu, Lin1 <lin1...@intel.com>
> > > > > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao
> > > > > <hongtao....@intel.com>; ubiz...@gmail.com
> > > > > Subject: Re: [PATCH 1/3] vect: generate suitable convert insn
> > > > > for int -> int, float
> > > > > -> float and int <-> float.
> > > > >
> > > > > On Thu, 23 May 2024, Hu, Lin1 wrote:
> > > > >
> > > > > > gcc/ChangeLog:
> > > > > >
> > > > > >     PR target/107432
> > > > > >     * tree-vect-generic.cc
> > > > > >     (supportable_indirect_narrowing_operation): New function for
> > > > > >     support indirect narrowing convert.
> > > > > >     (supportable_indirect_widening_operation): New function for
> > > > > >     support indirect widening convert.
> > > > > >     (expand_vector_conversion): Support convert for int -> int,
> > > > > >     float -> float and int <-> float.
> > > > > >
> > > > > > gcc/testsuite/ChangeLog:
> > > > > >
> > > > > >     PR target/107432
> > > > > >     * gcc.target/i386/pr107432-1.c: New test.
> > > > > >     * gcc.target/i386/pr107432-2.c: Ditto.
> > > > > >     * gcc.target/i386/pr107432-3.c: Ditto.
> > > > > >     * gcc.target/i386/pr107432-4.c: Ditto.
> > > > > >     * gcc.target/i386/pr107432-5.c: Ditto.
> > > > > >     * gcc.target/i386/pr107432-6.c: Ditto.
> > > > > >     * gcc.target/i386/pr107432-7.c: Ditto.
> > > > > > ---
> > > > > > diff --git a/gcc/tree-vect-generic.cc
> > > > > > b/gcc/tree-vect-generic.cc index
> > > > > > ab640096ca2..0bedb53d9f9 100644
> > > > > > --- a/gcc/tree-vect-generic.cc
> > > > > > +++ b/gcc/tree-vect-generic.cc
> > > > > > @@ -45,6 +45,8 @@ along with GCC; see the file COPYING3.  If
> > > > > > not see #include "gimple-match.h"
> > > > > >  #include "recog.h"         /* FIXME: for insn_data */
> > > > > >  #include "optabs-libfuncs.h"
> > > > > > +#include "cfgloop.h"
> > > > > > +#include "tree-vectorizer.h"
> > > > > >
> > > > > >
> > > > > >  /* Build a ternary operation and gimplify it.  Emit code before 
> > > > > > GSI.
> > > > > > @@ -1834,6 +1836,142 @@ do_vec_narrow_conversion
> > > > > (gimple_stmt_iterator *gsi, tree inner_type, tree a,
> > > > > >    return gimplify_build2 (gsi, code, outer_type, b, c);  }
> > > > > >
> > > > > > +/* A subroutine of expand_vector_conversion, support indirect
> > > > > > +conversion
> > > > > for
> > > > > > +   float <-> int, like double -> char.  */ bool
> > > > > > +supportable_indirect_narrowing_operation (gimple_stmt_iterator 
> > > > > > *gsi,
> > > > > > +                                    enum tree_code code,
> > > > > > +                                    tree lhs,
> > > > > > +                                    tree arg)
> > > > > > +{
> > > > > > +  gimple *g;
> > > > > > +  tree ret_type = TREE_TYPE (lhs);
> > > > > > +  tree arg_type = TREE_TYPE (arg);
> > > > > > +  tree new_rhs;
> > > > > > +
> > > > > > +  unsigned int ret_elt_bits = vector_element_bits (ret_type);
> > > > > > + unsigned int arg_elt_bits = vector_element_bits (arg_type);
> > > > > > + if (code != FIX_TRUNC_EXPR || flag_trapping_math ||
> > > > > > + ret_elt_bits >=
> > > > > arg_elt_bits)
> > > > > > +    return false;
> > > > > > +
> > > > > > +  unsigned short target_size;  scalar_mode tmp_cvt_mode;
> > > > > > + scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE
> > > > > > + (ret_type)); scalar_mode rhs_mode = GET_MODE_INNER
> > > > > > + (TYPE_MODE (arg_type)); tree cvt_type = NULL_TREE;
> > > > > > + tmp_cvt_mode = lhs_mode; target_size = GET_MODE_SIZE
> > > > > > + (rhs_mode);
> > > > > > +
> > > > > > +  opt_scalar_mode mode_iter;
> > > > > > +  enum tree_code tc1, tc2;
> > > > > > +  unsigned HOST_WIDE_INT nelts
> > > > > > +    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (arg_type));
> > > > > > +
> > > > > > +  FOR_EACH_2XWIDER_MODE (mode_iter, tmp_cvt_mode)
> > > > > > +    {
> > > > > > +      tmp_cvt_mode = mode_iter.require ();
> > > > > > +
> > > > > > +      if (GET_MODE_SIZE (tmp_cvt_mode) > target_size)
> > > > > > +   break;
> > > > > > +
> > > > > > +      scalar_mode cvt_mode;
> > > > > > +      int tmp_cvt_size = GET_MODE_BITSIZE (tmp_cvt_mode);
> > > > > > +      if (!int_mode_for_size (tmp_cvt_size, 0).exists (&cvt_mode))
> > > > > > +   break;
> > > > > > +
> > > > > > +      int cvt_size = GET_MODE_BITSIZE (cvt_mode);
> > > > > > +      bool isUnsigned = TYPE_UNSIGNED (ret_type) ||
> > > > > > + TYPE_UNSIGNED
> > > > > (arg_type);
> > > > > > +      cvt_type = build_nonstandard_integer_type (cvt_size,
> > > > > > + isUnsigned);
> > > > > > +
> > > > > > +      cvt_type = build_vector_type (cvt_type, nelts);
> > > > > > +      if (cvt_type == NULL_TREE
> > > > > > +     || !supportable_convert_operation ((tree_code) NOP_EXPR,
> > > > > > +                                        ret_type,
> > > > > > +                                        cvt_type, &tc1)
> > > > > > +     || !supportable_convert_operation ((tree_code) code,
> > > > > > +                                        cvt_type,
> > > > > > +                                        arg_type, &tc2))
> > > > > > +   continue;
> > > > > > +
> > > > > > +      new_rhs = make_ssa_name (cvt_type);
> > > > > > +      g = vect_gimple_build (new_rhs, tc2, arg);
> > > > > > +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > > > > > +      g = gimple_build_assign (lhs, tc1, new_rhs);
> > > > > > +      gsi_replace (gsi, g, false);
> > > > > > +      return true;
> > > > > > +    }
> > > > > > +  return false;
> > > > > > +}
> > > > > > +
> > > > > > +/* A subroutine of expand_vector_conversion, support indirect
> > > > > > +conversion
> > > > > for
> > > > > > +   float <-> int, like char -> double.  */ bool
> > > > > > +supportable_indirect_widening_operation (gimple_stmt_iterator *gsi,
> > > > > > +                                    enum tree_code code,
> > > > > > +                                    tree lhs,
> > > > > > +                                    tree arg)
> > > > > > +{
> > > > > > +  gimple *g;
> > > > > > +  tree ret_type = TREE_TYPE (lhs);
> > > > > > +  tree arg_type = TREE_TYPE (arg);
> > > > > > +  tree new_rhs;
> > > > > > +
> > > > > > +  unsigned int ret_elt_bits = vector_element_bits (ret_type);
> > > > > > + unsigned int arg_elt_bits = vector_element_bits (arg_type);
> > > > > > + if (ret_elt_bits <= arg_elt_bits || code != FLOAT_EXPR)
> > > > > > +    return false;
> > > > > > +
> > > > > > +  unsigned short target_size;  scalar_mode tmp_cvt_mode;
> > > > > > + scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE
> > > > > > + (ret_type)); scalar_mode rhs_mode = GET_MODE_INNER
> > > > > > + (TYPE_MODE (arg_type)); tree cvt_type = NULL_TREE;
> > > > > > + target_size = GET_MODE_SIZE (lhs_mode);  int rhs_size =
> > > > > > + GET_MODE_BITSIZE (rhs_mode);  if (!int_mode_for_size (rhs_size,
> 0).exists (&tmp_cvt_mode))
> > > > > > +    return false;
> > > > > > +
> > > > > > +  opt_scalar_mode mode_iter;
> > > > > > +  enum tree_code tc1, tc2;
> > > > > > +  unsigned HOST_WIDE_INT nelts
> > > > > > +    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (arg_type));
> > > > > > +
> > > > > > +  FOR_EACH_2XWIDER_MODE (mode_iter, tmp_cvt_mode)
> > > > > > +    {
> > > > > > +      tmp_cvt_mode = mode_iter.require ();
> > > > > > +
> > > > > > +      if (GET_MODE_SIZE (tmp_cvt_mode) > target_size)
> > > > > > +   break;
> > > > > > +
> > > > > > +      scalar_mode cvt_mode;
> > > > > > +      int tmp_cvt_size = GET_MODE_BITSIZE (tmp_cvt_mode);
> > > > > > +      if (!int_mode_for_size (tmp_cvt_size, 0).exists (&cvt_mode))
> > > > > > +   break;
> > > > > > +
> > > > > > +      int cvt_size = GET_MODE_BITSIZE (cvt_mode);
> > > > > > +      bool isUnsigned = TYPE_UNSIGNED (ret_type) ||
> > > > > > + TYPE_UNSIGNED
> > > > > (arg_type);
> > > > > > +      cvt_type = build_nonstandard_integer_type (cvt_size,
> > > > > > + isUnsigned);
> > > > > > +
> > > > > > +      cvt_type = build_vector_type (cvt_type, nelts);
> > > > > > +      if (cvt_type == NULL_TREE
> > > > > > +     || !supportable_convert_operation ((tree_code) code,
> > > > > > +                                        ret_type,
> > > > > > +                                        cvt_type, &tc1)
> > > > > > +     || !supportable_convert_operation ((tree_code) NOP_EXPR,
> > > > > > +                                        cvt_type,
> > > > > > +                                        arg_type, &tc2))
> > > > > > +   continue;
> > > > > > +
> > > > > > +      new_rhs = make_ssa_name (cvt_type);
> > > > > > +      g = vect_gimple_build (new_rhs, tc2, arg);
> > > > > > +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > > > > > +      g = gimple_build_assign (lhs, tc1, new_rhs);
> > > > > > +      gsi_replace (gsi, g, false);
> > > > > > +      return true;
> > > > > > +    }
> > > > > > +  return false;
> > > > > > +}
> > > > > > +
> > > > >
> > > > > So the above improve the situation where the target can handle
> > > > > the two-step conversion.  It doesn't really allow this to work
> > > > > for too large vectors AFAICS (nor does it try pack/unpack for
> > > > > any of the conversions).  It also still duplicates code that's
> > > > > in the vectorizer.  I think you should be able to use
> > > > > supportable_narrowing_operation and possibly even
> > > > > supportable_widening_operation (though that needs refatoring to
> > > > > avoid the vectorizer internal stmt_vec_info type - possibly
> > > > > simply by gating
> > > the respective code on a non-NULL vinfo).  Both support multi-step
> conversions.
> > > > >
> > > >
> > > > I tried to use supportable_narrowing_operation and I met two questions:
> > > >
> > > > 1) supportable_narrowing_operation support v2df->v16qi, but I don't know
> > > >    which optab can help me convert v16qi to v2qi.
> > >
> > > It's API is a bit tricky but for v2df -> v2qi (I expect you'll have
> > > an equal number of lanes in/out for .CONVERT_VECTOR) it likely
> > > outputs a multi-step conversion where you have to look into
> > > *INTERM_TYPES and second-guess the operation code to use for the
> > > intermediate steps (IIRC the intermediate steps all use either PACK/UNPACK
> or CONVERT, never FLOAT/FIX).
> > >
> >
> > I made a mistake in what I said before. I think
> > supportable_narrowing_operation doesn't support v2df->v2qi, it only
> > use VEC_PACK_TRUNC_EXPRT in its intermediate steps. This makes it
> > require that vectype_in and vectype_out have the same size to return
> > true. I want to make sure I'm doing the right thing, I can build a
> > tmp_type by build_nonstandard_integer_type and get_same_sized_vectype.
> > And use tree_vec_extract to extract v2qi from v16qi after
> > supportable_narrowing_operation.
> 
> Yes.  It looks like the vectorizer, when the vector types number of lanes 
> agree
> goes the 'NONE' conversion path, checks supportable_convert_operation and
> then has open-coded handling for
> 
>       /* For conversions between float and integer types try whether
>          we can use intermediate signed integer types to support the
>          conversion.  */
> 
> that means I was wrong in indicating supportable_narrowing_operation was for
> element narrowing, it is for number-of-lane "narrowing".
> 
> That said, vectorizable_conversion, in the NONE case has handling that should
> be split out into a function that's usable also from vector lowering then so 
> that
> both vectorization and lowering handle the same cases.  The interface would be
> similar to supportable_narrowing_operation.
> 
> > >
> > > > 2) I tried a testcase (https://godbolt.org/z/z88xYW85e), this result is
> > > >    not what I expected, because it only use vec_pack_trunc. I expect it
> > > >    can use vcvttpd2dq + vpmovdw.
> > >
> > > With -O3 -fno-tree-loop-vectorize that's what you get.  What you see
> > > is because of the restriction of the loop vectorizer to work on a single 
> > > vector
> size only.
> > >
> >
> > Yes, it works, but the program runs the NONE part
> > (tree-vect-stmts.cc:5357) instead of the NARROW_DST part
> > (tree-vect-stmts.cc:5545). I think maybe we can wrap the part of the
> > code from line:5373 to line:5455 as a function. This avoids
> > duplicating the wheel, and I get the results I'm looking for.
> 
> Yeah.
> 
> > In addition to wrapping the function. If you are motivated by the fact
> > that our modifications are not generalized enough, I think we can add
> > supportable_narrowing/widening_operation after the current single step
> > VEC_CONVERT (line 1972 and line 2078). It should try to use a single
> > step and then use multiple steps. If you agree, I'd like to remove my
> > changes about indirect conversions for now, and keep only the direct
> > conversions, so that I can merge the three current patches into the
> > trunk first, and then add the change about indirect conversions later.
> 
> I think it should go like finding the largest compute_vectype pair
> (source/destination) that we can handle either directly or indirectly via the 
> new
> function.
> 
> Richard.
> 

Thanks, I will wrap the code in the new function and put out a new version of 
this patch. I have a small question, what does "finding the largest 
compute_vectype pair" mean? Some piece of code from gcc?

BRs,
Lin

>
> >
> > >
> > > > If I can solve the first question and the function be better
> > > > (maybe support trunc<vectype_in><vectype_out>), I'd be happy to
> > > > use it directly. I prefer my scheme for now. My functions is more
> > > > like supportable_convert_operation. Perhaps, we can modify
> > > > supportable_narrowing_operation, but I think it should be another
> > > > patch, it will influence vectorizer.
> > >
> > > But since you are doing a multi-step conversion this is really what
> > > supportable_narrowing_operation is about.  I don't think we want to
> > > re-invent the wheel here.  Likewise your approach won't get you to
> > > use VEC_[UN]PACK_HI/LO/EVEN/ODD either (supported by the current
> > > single- step .CONVERT_VECTOR lowering).
> > > supportable_narrowing_operation also checks for this.
> > >
> > > Richard.
> > >
> > >
> > > > BRs,
> > > > Lin
> > > >
> > > > >
> > > > > >  /* Expand VEC_CONVERT ifn call.  */
> > > > > >
> > > > > >  static void
> > > > > > @@ -1871,14 +2009,21 @@ expand_vector_conversion
> > > > > (gimple_stmt_iterator *gsi)
> > > > > >    else if (ret_elt_bits > arg_elt_bits)
> > > > > >      modifier = WIDEN;
> > > > > >
> > > > > > +  if (supportable_convert_operation (code, ret_type, arg_type,
> &code1))
> > > > > > +    {
> > > > > > +      g = gimple_build_assign (lhs, code1, arg);
> > > > > > +      gsi_replace (gsi, g, false);
> > > > > > +      return;
> > > > > > +    }
> > > > > > +
> > > > > > +  if (supportable_indirect_narrowing_operation(gsi, code, lhs, 
> > > > > > arg))
> > > > > > +    return;
> > > > > > +
> > > > > > +  if (supportable_indirect_widening_operation(gsi, code, lhs, arg))
> > > > > > +    return;
> > > > > > +
> > > > > >    if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> > > > > FLOAT_EXPR))
> > > > > >      {
> > > > > > -      if (supportable_convert_operation (code, ret_type, arg_type,
> &code1))
> > > > > > -   {
> > > > > > -     g = gimple_build_assign (lhs, code1, arg);
> > > > > > -     gsi_replace (gsi, g, false);
> > > > > > -     return;
> > > > > > -   }
> > > > > >        /* Can't use get_compute_type here, as
> > > supportable_convert_operation
> > > > > >      doesn't necessarily use an optab and needs two arguments.  */
> > > > > >        tree vec_compute_type
> > > > > >
> > > > >
> > > > > --
> > > > > Richard Biener <rguent...@suse.de> SUSE Software Solutions
> > > > > Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany;
> > > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> > > > > Nuernberg)
> > > >
> > >
> > > --
> > > Richard Biener <rguent...@suse.de>
> > > SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461
> > > Nuernberg, Germany;
> > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> > > Nuernberg)
> >
> 
> --
> Richard Biener <rguent...@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to