On Mon, Aug 14, 2023 at 10:40 AM Hongtao Liu <crazy...@gmail.com> wrote: > > On Fri, Aug 11, 2023 at 2:02 PM liuhongt via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Rename original use_gather to use_gather_8parts, Support > > -mtune-ctrl={,^}use_gather to set/clear tune features > > use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather > > as alias of -mtune-ctrl=, use_gather, ^use_gather. > > > > Similar for use_scatter. > > > > How about this version? > I'll commit the patch if there's no objections in the next 24 hours. Pushed to trunk and backport to release/gcc-{13,12,11}. Note for GCC11, The backport patch only supports -m{no,}gather since the branch doesn't have scatter tunings. For GCC12/GCC13. both -m{no,}gather/scatter are supported. > > > > gcc/ChangeLog: > > > > * config/i386/i386-builtins.cc > > (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts. > > * config/i386/i386-options.cc (parse_mtune_ctrl_str): > > Set/Clear tune features use_{gather,scatter}_{2parts, 4parts, > > 8parts} for -mtune-crtl={,^}{use_gather,use_scatter}. > > * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust > > for use_scatter_8parts > > * config/i386/i386.h (TARGET_USE_GATHER): Rename to .. > > (TARGET_USE_GATHER_8PARTS): .. this. > > (TARGET_USE_SCATTER): Rename to .. > > (TARGET_USE_SCATTER_8PARTS): .. this. > > * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to > > (X86_TUNE_USE_GATHER_8PARTS): .. this. > > (X86_TUNE_USE_SCATTER): Rename to > > (X86_TUNE_USE_SCATTER_8PARTS): .. this. > > * config/i386/i386.opt: Add new options mgather, mscatter. > > --- > > gcc/config/i386/i386-builtins.cc | 2 +- > > gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++--------- > > gcc/config/i386/i386.cc | 2 +- > > gcc/config/i386/i386.h | 8 ++--- > > gcc/config/i386/i386.opt | 8 +++++ > > gcc/config/i386/x86-tune.def | 4 +-- > > 6 files changed, 56 insertions(+), 22 deletions(-) > > > > diff --git a/gcc/config/i386/i386-builtins.cc > > b/gcc/config/i386/i386-builtins.cc > > index 356b6dfd5fb..8a0b8dfe073 100644 > > --- a/gcc/config/i386/i386-builtins.cc > > +++ b/gcc/config/i386/i386-builtins.cc > > @@ -1657,7 +1657,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, > > ? !TARGET_USE_GATHER_2PARTS > > : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u) > > ? !TARGET_USE_GATHER_4PARTS > > - : !TARGET_USE_GATHER))) > > + : !TARGET_USE_GATHER_8PARTS))) > > return NULL_TREE; > > > > if ((TREE_CODE (index_type) != INTEGER_TYPE > > diff --git a/gcc/config/i386/i386-options.cc > > b/gcc/config/i386/i386-options.cc > > index 127ee24203c..b8d038af69d 100644 > > --- a/gcc/config/i386/i386-options.cc > > +++ b/gcc/config/i386/i386-options.cc > > @@ -1731,20 +1731,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, > > bool dump) > > curr_feature_string++; > > clear = true; > > } > > - for (i = 0; i < X86_TUNE_LAST; i++) > > - { > > - if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) > > - { > > - ix86_tune_features[i] = !clear; > > - if (dump) > > - fprintf (stderr, "Explicitly %s feature %s\n", > > - clear ? "clear" : "set", > > ix86_tune_feature_names[i]); > > - break; > > - } > > - } > > - if (i == X86_TUNE_LAST) > > - error ("unknown parameter to option %<-mtune-ctrl%>: %s", > > - clear ? curr_feature_string - 1 : curr_feature_string); > > + > > + if (!strcmp (curr_feature_string, "use_gather")) > > + { > > + ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear; > > + ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear; > > + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear; > > + if (dump) > > + fprintf (stderr, "Explicitly %s features use_gather_2parts," > > + " use_gather_4parts, use_gather_8parts\n", > > + clear ? "clear" : "set"); > > + > > + } > > + else if (!strcmp (curr_feature_string, "use_scatter")) > > + { > > + ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear; > > + ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear; > > + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear; > > + if (dump) > > + fprintf (stderr, "Explicitly %s features use_scatter_2parts," > > + " use_scatter_4parts, use_scatter_8parts\n", > > + clear ? "clear" : "set"); > > + } > > + else > > + { > > + for (i = 0; i < X86_TUNE_LAST; i++) > > + { > > + if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) > > + { > > + ix86_tune_features[i] = !clear; > > + if (dump) > > + fprintf (stderr, "Explicitly %s feature %s\n", > > + clear ? "clear" : "set", > > ix86_tune_feature_names[i]); > > + break; > > + } > > + } > > + > > + if (i == X86_TUNE_LAST) > > + error ("unknown parameter to option %<-mtune-ctrl%>: %s", > > + clear ? curr_feature_string - 1 : curr_feature_string); > > + } > > curr_feature_string = next_feature_string; > > } > > while (curr_feature_string); > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > index d592ece700a..cd49fb9e47a 100644 > > --- a/gcc/config/i386/i386.cc > > +++ b/gcc/config/i386/i386.cc > > @@ -19193,7 +19193,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype, > > ? !TARGET_USE_SCATTER_2PARTS > > : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) > > ? !TARGET_USE_SCATTER_4PARTS > > - : !TARGET_USE_SCATTER)) > > + : !TARGET_USE_SCATTER_8PARTS)) > > return NULL_TREE; > > > > if ((TREE_CODE (index_type) != INTEGER_TYPE > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > index ef342fcee9b..f7330e818e7 100644 > > --- a/gcc/config/i386/i386.h > > +++ b/gcc/config/i386/i386.h > > @@ -403,10 +403,10 @@ extern unsigned char > > ix86_tune_features[X86_TUNE_LAST]; > > ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] > > #define TARGET_USE_SCATTER_4PARTS \ > > ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] > > -#define TARGET_USE_GATHER \ > > - ix86_tune_features[X86_TUNE_USE_GATHER] > > -#define TARGET_USE_SCATTER \ > > - ix86_tune_features[X86_TUNE_USE_SCATTER] > > +#define TARGET_USE_GATHER_8PARTS \ > > + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] > > +#define TARGET_USE_SCATTER_8PARTS \ > > + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] > > #define TARGET_FUSE_CMP_AND_BRANCH_32 \ > > ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32] > > #define TARGET_FUSE_CMP_AND_BRANCH_64 \ > > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > > index 8a43187f703..78b499304a4 100644 > > --- a/gcc/config/i386/i386.opt > > +++ b/gcc/config/i386/i386.opt > > @@ -1302,3 +1302,11 @@ msm4 > > Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save > > Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and > > SM4 built-in functions and code generation. > > + > > +mgather > > +Target Alias(mtune-ctrl=, use_gather, ^use_gather) > > +Enable vectorization for gather instruction. > > + > > +mscatter > > +Target Alias(mtune-ctrl=, use_scatter, ^use_scatter) > > +Enable vectorization for scatter instruction. > > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def > > index 40e04ecddbf..d7f20d3a118 100644 > > --- a/gcc/config/i386/x86-tune.def > > +++ b/gcc/config/i386/x86-tune.def > > @@ -511,13 +511,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, > > "use_scatter_4parts", > > > > /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more > > elements. */ > > -DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", > > +DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts", > > ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_ARROWLAKE > > | m_CORE_ATOM | m_GENERIC)) > > > > /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more > > elements. */ > > -DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter", > > +DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", > > ~(m_ZNVER4)) > > > > /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or > > -- > > 2.31.1 > > > > > -- > BR, > Hongtao
-- BR, Hongtao