https://github.com/DavidTruby created https://github.com/llvm/llvm-project/pull/119718
This patch adds the -fvectorize and -fno-vectorize flags to flang. Note that this also changes the behaviour of `flang -fc1` to match that of `clang -cc1`, which is that vectorization is only enabled in the presence of the `-vectorize-loops` flag. Additionally, this patch changes the behaviour of the default optimisation levels to match clang, such that vectorization only happens at the same levels as it does there. This patch is in draft while I write an RFC to discuss the above two changes. >From 0b9cbef8073cdfc511087c315deb82a90c408640 Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Thu, 12 Dec 2024 14:50:19 +0000 Subject: [PATCH] [flang] Add -f[no-]vectorize flags --- clang/include/clang/Driver/Driver.h | 3 ++ clang/include/clang/Driver/Options.td | 11 ++++-- clang/lib/Driver/Driver.cpp | 34 +++++++++++++++++++ clang/lib/Driver/ToolChains/Clang.cpp | 33 ------------------ clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++ .../include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 4 +++ flang/lib/Frontend/FrontendActions.cpp | 3 ++ flang/test/Driver/optimization-remark.f90 | 22 ++++++------ 9 files changed, 74 insertions(+), 47 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index c23d037e725bb9..4c4375b25902c4 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -856,6 +856,9 @@ void applyOverrideOptions(SmallVectorImpl<const char *> &Args, llvm::StringSet<> &SavedStrings, raw_ostream *OS = nullptr); +bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args, + bool isSlpVec); + } // end namespace driver } // end namespace clang diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 88862ae9edb29d..bc3c1e4a0045ff 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4038,11 +4038,15 @@ defm assumptions : BoolFOption<"assumptions", "Disable codegen and compile-time checks for C++23's [[assume]] attribute">, PosFlag<SetTrue>>; + +let Visibility = [ClangOption, FlangOption] in { def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>, HelpText<"Enable the loop vectorization passes">; def fno_vectorize : Flag<["-"], "fno-vectorize">, Group<f_Group>; def : Flag<["-"], "ftree-vectorize">, Alias<fvectorize>; def : Flag<["-"], "fno-tree-vectorize">, Alias<fno_vectorize>; +} + def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group<f_Group>, HelpText<"Enable the superword-level parallelism vectorization passes">; def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group<f_Group>; @@ -7323,6 +7327,10 @@ let Visibility = [CC1Option, FC1Option] in { def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">, HelpText<"Link and internalize needed symbols from the given bitcode file " "before performing optimizations.">; + +def vectorize_loops : Flag<["-"], "vectorize-loops">, + HelpText<"Run the Loop vectorization passes">, + MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>; } // let Visibility = [CC1Option, FC1Option] let Visibility = [CC1Option] in { @@ -7439,9 +7447,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt", PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the " "optimization pipeline">, NegFlag<SetFalse, [], [ClangOption]>>; -def vectorize_loops : Flag<["-"], "vectorize-loops">, - HelpText<"Run the Loop vectorization passes">, - MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>; def vectorize_slp : Flag<["-"], "vectorize-slp">, HelpText<"Run the SLP vectorization passes">, MarshallingInfoFlag<CodeGenOpts<"VectorizeSLP">>; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index fb73b62cf2daed..1510af0047fc47 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6980,3 +6980,37 @@ void driver::applyOverrideOptions(SmallVectorImpl<const char *> &Args, ++S; } } + +/// Vectorize at all optimization levels greater than 1 except for -Oz. +/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is +/// enabled. +bool driver::shouldEnableVectorizerAtOLevel(const ArgList &Args, + bool isSlpVec) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + return true; + + if (A->getOption().matches(options::OPT_O0)) + return false; + + assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); + + // Vectorize -Os. + StringRef S(A->getValue()); + if (S == "s") + return true; + + // Don't vectorize -Oz, unless it's the slp vectorizer. + if (S == "z") + return isSlpVec; + + unsigned OptLevel = 0; + if (S.getAsInteger(10, OptLevel)) + return false; + + return OptLevel > 1; + } + + return false; +} diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index d3206c3e8e25ed..69fdec4e74e08a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -503,39 +503,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, } } -/// Vectorize at all optimization levels greater than 1 except for -Oz. -/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is -/// enabled. -static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - return true; - - if (A->getOption().matches(options::OPT_O0)) - return false; - - assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); - - // Vectorize -Os. - StringRef S(A->getValue()); - if (S == "s") - return true; - - // Don't vectorize -Oz, unless it's the slp vectorizer. - if (S == "z") - return isSlpVec; - - unsigned OptLevel = 0; - if (S.getAsInteger(10, OptLevel)) - return false; - - return OptLevel > 1; - } - - return false; -} - /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index c98fdbd157bac8..5b229562583ff5 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -143,6 +143,16 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + // Enable vectorization per default according to the optimization level + // selected. For optimization levels that want vectorization we use the alias + // option to simplify the hasFlag logic. + bool enableVec = shouldEnableVectorizerAtOLevel(Args, false); + OptSpecifier vectorizeAliasOption = + enableVec ? options::OPT_O_Group : options::OPT_fvectorize; + if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption, + options::OPT_fno_vectorize, enableVec)) + CmdArgs.push_back("-vectorize-loops"); + if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index 9d03ec88a56b8a..b5d6bc6146e92b 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the ///< compile step. CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) +CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 648b88e84051c2..b517e86ca8c9a1 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/AllDiagnostics.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/OptionUtils.h" #include "clang/Driver/Options.h" @@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::options::OPT_fno_stack_arrays, false)) opts.StackArrays = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) + opts.VectorizeLoop = 1; + if (args.hasFlag(clang::driver::options::OPT_floop_versioning, clang::driver::options::OPT_fno_loop_versioning, false)) opts.LoopVersioning = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 77631f70dfd19f..3801408cf7967d 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -982,6 +982,9 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { llvm::StandardInstrumentations si(llvmModule->getContext(), opts.DebugPassManager); si.registerCallbacks(pic, &mam); + + pto.LoopVectorization = opts.VectorizeLoop; + llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic); // Attempt to load pass plugins and register their callbacks with PB. diff --git a/flang/test/Driver/optimization-remark.f90 b/flang/test/Driver/optimization-remark.f90 index e90baa892f46a0..90e310d36c807e 100644 --- a/flang/test/Driver/optimization-remark.f90 +++ b/flang/test/Driver/optimization-remark.f90 @@ -5,33 +5,33 @@ ! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1 ! Check fc1 can handle -Rpass -! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS ! Check that we can override -Rpass= with -Rno-pass. -! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted -! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check valid -Rpass regex -! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY ! Check valid -Rpass-missed regex -! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY ! Check valid -Rpass-analysis regex -! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY ! Check full -Rpass message is emitted -! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS +! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS ! Check full -Rpass-missed message is emitted -! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED +! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED ! Check full -Rpass-analysis message is emitted -! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS +! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS ! REMARKS: remark: ! NO-REMARKS-NOT: remark: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits