On Mon, Aug 14, 2023 at 10:40 AM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Fri, Aug 11, 2023 at 2:02 PM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Rename original use_gather to use_gather_8parts, Support
> > -mtune-ctrl={,^}use_gather to set/clear tune features
> > use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
> > as alias of -mtune-ctrl=, use_gather, ^use_gather.
> >
> > Similar for use_scatter.
> >
> > How about this version?
> I'll commit the patch if there's no objections in the next 24 hours.
Pushed to trunk and backport to release/gcc-{13,12,11}.
Note for GCC11, The backport patch only supports -m{no,}gather since
the branch doesn't have scatter tunings.
For GCC12/GCC13. both -m{no,}gather/scatter are supported.
> >
> > gcc/ChangeLog:
> >
> >         * config/i386/i386-builtins.cc
> >         (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
> >         * config/i386/i386-options.cc (parse_mtune_ctrl_str):
> >         Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
> >         8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
> >         * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
> >         for use_scatter_8parts
> >         * config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
> >         (TARGET_USE_GATHER_8PARTS): .. this.
> >         (TARGET_USE_SCATTER): Rename to ..
> >         (TARGET_USE_SCATTER_8PARTS): .. this.
> >         * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
> >         (X86_TUNE_USE_GATHER_8PARTS): .. this.
> >         (X86_TUNE_USE_SCATTER): Rename to
> >         (X86_TUNE_USE_SCATTER_8PARTS): .. this.
> >         * config/i386/i386.opt: Add new options mgather, mscatter.
> > ---
> >  gcc/config/i386/i386-builtins.cc |  2 +-
> >  gcc/config/i386/i386-options.cc  | 54 +++++++++++++++++++++++---------
> >  gcc/config/i386/i386.cc          |  2 +-
> >  gcc/config/i386/i386.h           |  8 ++---
> >  gcc/config/i386/i386.opt         |  8 +++++
> >  gcc/config/i386/x86-tune.def     |  4 +--
> >  6 files changed, 56 insertions(+), 22 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-builtins.cc 
> > b/gcc/config/i386/i386-builtins.cc
> > index 356b6dfd5fb..8a0b8dfe073 100644
> > --- a/gcc/config/i386/i386-builtins.cc
> > +++ b/gcc/config/i386/i386-builtins.cc
> > @@ -1657,7 +1657,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
> >           ? !TARGET_USE_GATHER_2PARTS
> >           : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
> >              ? !TARGET_USE_GATHER_4PARTS
> > -            : !TARGET_USE_GATHER)))
> > +            : !TARGET_USE_GATHER_8PARTS)))
> >      return NULL_TREE;
> >
> >    if ((TREE_CODE (index_type) != INTEGER_TYPE
> > diff --git a/gcc/config/i386/i386-options.cc 
> > b/gcc/config/i386/i386-options.cc
> > index 127ee24203c..b8d038af69d 100644
> > --- a/gcc/config/i386/i386-options.cc
> > +++ b/gcc/config/i386/i386-options.cc
> > @@ -1731,20 +1731,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, 
> > bool dump)
> >            curr_feature_string++;
> >            clear = true;
> >          }
> > -      for (i = 0; i < X86_TUNE_LAST; i++)
> > -        {
> > -          if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> > -            {
> > -              ix86_tune_features[i] = !clear;
> > -              if (dump)
> > -                fprintf (stderr, "Explicitly %s feature %s\n",
> > -                         clear ? "clear" : "set", 
> > ix86_tune_feature_names[i]);
> > -              break;
> > -            }
> > -        }
> > -      if (i == X86_TUNE_LAST)
> > -       error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> > -              clear ? curr_feature_string - 1 : curr_feature_string);
> > +
> > +      if (!strcmp (curr_feature_string, "use_gather"))
> > +       {
> > +         ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
> > +         ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
> > +         ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
> > +         if (dump)
> > +           fprintf (stderr, "Explicitly %s features use_gather_2parts,"
> > +                    " use_gather_4parts, use_gather_8parts\n",
> > +                    clear ? "clear" : "set");
> > +
> > +       }
> > +      else if (!strcmp (curr_feature_string, "use_scatter"))
> > +       {
> > +         ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
> > +         ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
> > +         ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
> > +         if (dump)
> > +           fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
> > +                    " use_scatter_4parts, use_scatter_8parts\n",
> > +                    clear ? "clear" : "set");
> > +       }
> > +      else
> > +       {
> > +         for (i = 0; i < X86_TUNE_LAST; i++)
> > +           {
> > +             if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> > +               {
> > +                 ix86_tune_features[i] = !clear;
> > +                 if (dump)
> > +                   fprintf (stderr, "Explicitly %s feature %s\n",
> > +                            clear ? "clear" : "set", 
> > ix86_tune_feature_names[i]);
> > +                 break;
> > +               }
> > +           }
> > +
> > +         if (i == X86_TUNE_LAST)
> > +           error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> > +                  clear ? curr_feature_string - 1 : curr_feature_string);
> > +       }
> >        curr_feature_string = next_feature_string;
> >      }
> >    while (curr_feature_string);
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index d592ece700a..cd49fb9e47a 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -19193,7 +19193,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
> >        ? !TARGET_USE_SCATTER_2PARTS
> >        : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
> >          ? !TARGET_USE_SCATTER_4PARTS
> > -        : !TARGET_USE_SCATTER))
> > +        : !TARGET_USE_SCATTER_8PARTS))
> >      return NULL_TREE;
> >
> >    if ((TREE_CODE (index_type) != INTEGER_TYPE
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index ef342fcee9b..f7330e818e7 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -403,10 +403,10 @@ extern unsigned char 
> > ix86_tune_features[X86_TUNE_LAST];
> >         ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
> >  #define TARGET_USE_SCATTER_4PARTS \
> >         ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
> > -#define TARGET_USE_GATHER \
> > -       ix86_tune_features[X86_TUNE_USE_GATHER]
> > -#define TARGET_USE_SCATTER \
> > -       ix86_tune_features[X86_TUNE_USE_SCATTER]
> > +#define TARGET_USE_GATHER_8PARTS \
> > +       ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
> > +#define TARGET_USE_SCATTER_8PARTS \
> > +       ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
> >  #define TARGET_FUSE_CMP_AND_BRANCH_32 \
> >         ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
> >  #define TARGET_FUSE_CMP_AND_BRANCH_64 \
> > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> > index 8a43187f703..78b499304a4 100644
> > --- a/gcc/config/i386/i386.opt
> > +++ b/gcc/config/i386/i386.opt
> > @@ -1302,3 +1302,11 @@ msm4
> >  Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save
> >  Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
> >  SM4 built-in functions and code generation.
> > +
> > +mgather
> > +Target Alias(mtune-ctrl=, use_gather, ^use_gather)
> > +Enable vectorization for gather instruction.
> > +
> > +mscatter
> > +Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
> > +Enable vectorization for scatter instruction.
> > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> > index 40e04ecddbf..d7f20d3a118 100644
> > --- a/gcc/config/i386/x86-tune.def
> > +++ b/gcc/config/i386/x86-tune.def
> > @@ -511,13 +511,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, 
> > "use_scatter_4parts",
> >
> >  /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
> >     elements.  */
> > -DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
> > +DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
> >           ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_ARROWLAKE
> >             | m_CORE_ATOM | m_GENERIC))
> >
> >  /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
> >     elements.  */
> > -DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
> > +DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
> >           ~(m_ZNVER4))
> >
> >  /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
> > --
> > 2.31.1
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

Reply via email to