https://github.com/DavidTruby updated https://github.com/llvm/llvm-project/pull/119718
>From af135a1a4156be9076931ceeb92f75171a0cb7ad Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Thu, 12 Dec 2024 14:50:19 +0000 Subject: [PATCH] [flang] Add -f[no-]vectorize flags --- clang/include/clang/Driver/Options.td | 11 +++++-- clang/lib/Driver/ToolChains/Clang.cpp | 33 ------------------- clang/lib/Driver/ToolChains/CommonArgs.cpp | 33 +++++++++++++++++++ clang/lib/Driver/ToolChains/CommonArgs.h | 2 ++ clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++ .../include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 4 +++ flang/lib/Frontend/FrontendActions.cpp | 2 ++ flang/test/Driver/optimization-remark.f90 | 22 ++++++------- flang/test/Integration/unroll-loops.f90 | 4 +-- flang/test/Lower/HLFIR/unroll-loops.fir | 4 +-- 11 files changed, 75 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5ad187926e710..6824ffb8828d4 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3986,11 +3986,15 @@ defm assumptions : BoolFOption<"assumptions", "Disable codegen and compile-time checks for C++23's [[assume]] attribute">, PosFlag<SetTrue>>; + +let Visibility = [ClangOption, FlangOption] in { def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>, HelpText<"Enable the loop vectorization passes">; def fno_vectorize : Flag<["-"], "fno-vectorize">, Group<f_Group>; def : Flag<["-"], "ftree-vectorize">, Alias<fvectorize>; def : Flag<["-"], "fno-tree-vectorize">, Alias<fno_vectorize>; +} + def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group<f_Group>, HelpText<"Enable the superword-level parallelism vectorization passes">; def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group<f_Group>; @@ -7332,6 +7336,10 @@ def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">, def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">, HelpText<"Link the given bitcode file before performing optimizations.">; + +def vectorize_loops : Flag<["-"], "vectorize-loops">, + HelpText<"Run the Loop vectorization passes">, + MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>; } // let Visibility = [CC1Option, FC1Option] let Visibility = [CC1Option] in { @@ -7447,9 +7455,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt", PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the " "optimization pipeline">, NegFlag<SetFalse, [], [ClangOption]>>; -def vectorize_loops : Flag<["-"], "vectorize-loops">, - HelpText<"Run the Loop vectorization passes">, - MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>; def vectorize_slp : Flag<["-"], "vectorize-slp">, HelpText<"Run the SLP vectorization passes">, MarshallingInfoFlag<CodeGenOpts<"VectorizeSLP">>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 96af466e067a8..7c50970068fa9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -511,39 +511,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, } } -/// Vectorize at all optimization levels greater than 1 except for -Oz. -/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is -/// enabled. -static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - return true; - - if (A->getOption().matches(options::OPT_O0)) - return false; - - assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); - - // Vectorize -Os. - StringRef S(A->getValue()); - if (S == "s") - return true; - - // Don't vectorize -Oz, unless it's the slp vectorizer. - if (S == "z") - return isSlpVec; - - unsigned OptLevel = 0; - if (S.getAsInteger(10, OptLevel)) - return false; - - return OptLevel > 1; - } - - return false; -} - /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2d01943ca1ac4..1a2299a92c54e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3133,3 +3133,36 @@ void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, if (use_fwrapv_pointer) CmdArgs.push_back("-fwrapv-pointer"); } + +/// Vectorize at all optimization levels greater than 1 except for -Oz. +/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is +/// enabled. +bool tools::shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + return true; + + if (A->getOption().matches(options::OPT_O0)) + return false; + + assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); + + // Vectorize -Os. + StringRef S(A->getValue()); + if (S == "s") + return true; + + // Don't vectorize -Oz, unless it's the slp vectorizer. + if (S == "z") + return isSlpVec; + + unsigned OptLevel = 0; + if (S.getAsInteger(10, OptLevel)) + return false; + + return OptLevel > 1; + } + + return false; +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index b6ddd99b87279..783a1f834b33d 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -265,6 +265,8 @@ bool shouldRecordCommandLine(const ToolChain &TC, void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); +bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args, + bool isSlpVec); } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 9ad795edd724d..45c26cf8c3159 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -149,6 +149,16 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + // Enable vectorization per default according to the optimization level + // selected. For optimization levels that want vectorization we use the alias + // option to simplify the hasFlag logic. + bool enableVec = shouldEnableVectorizerAtOLevel(Args, false); + OptSpecifier vectorizeAliasOption = + enableVec ? options::OPT_O_Group : options::OPT_fvectorize; + if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption, + options::OPT_fno_vectorize, enableVec)) + CmdArgs.push_back("-vectorize-loops"); + if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index deb8d1aede518..44cb5a2cdd497 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the ///< compile step. CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) +CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f3d9432c62d3b..724316f1b30c7 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/AllDiagnostics.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/OptionUtils.h" #include "clang/Driver/Options.h" @@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::options::OPT_fno_stack_arrays, false)) opts.StackArrays = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) + opts.VectorizeLoop = 1; + if (args.hasFlag(clang::driver::options::OPT_floop_versioning, clang::driver::options::OPT_fno_loop_versioning, false)) opts.LoopVersioning = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 763c810ace0eb..76d329d043731 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -1037,6 +1037,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { si.getTimePasses().setOutStream(ci.getTimingStreamLLVM()); pto.LoopUnrolling = opts.UnrollLoops; pto.LoopInterleaving = opts.UnrollLoops; + pto.LoopVectorization = opts.VectorizeLoop; + llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic); // Attempt to load pass plugins and register their callbacks with PB. diff --git a/flang/test/Driver/optimization-remark.f90 b/flang/test/Driver/optimization-remark.f90 index e90baa892f46a..90e310d36c807 100644 --- a/flang/test/Driver/optimization-remark.f90 +++ b/flang/test/Driver/optimization-remark.f90 @@ -5,33 +5,33 @@ ! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1 ! Check fc1 can handle -Rpass -! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS ! Check that we can override -Rpass= with -Rno-pass. -! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted -! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check valid -Rpass regex -! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY ! Check valid -Rpass-missed regex -! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY ! Check valid -Rpass-analysis regex -! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY ! Check full -Rpass message is emitted -! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS +! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS ! Check full -Rpass-missed message is emitted -! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED +! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED ! Check full -Rpass-analysis message is emitted -! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS +! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS ! REMARKS: remark: ! NO-REMARKS-NOT: remark: diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index c3fcf1c3a7cf3..debe45e0ec359 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -1,8 +1,8 @@ ! FIXME: https://github.com/llvm/llvm-project/issues/123668 ! ! DEFINE: %{triple} = -! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL ! ! REDEFINE: %{triple} = aarch64-unknown-linux-gnu ! RUN: %if aarch64-registered-target %{ %{check-unroll} %} diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir index 6a9dd28a37b6d..1321f39677405 100644 --- a/flang/test/Lower/HLFIR/unroll-loops.fir +++ b/flang/test/Lower/HLFIR/unroll-loops.fir @@ -1,6 +1,6 @@ // DEFINE: %{triple} = -// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL // REDEFINE: %{triple} = aarch64-unknown-linux-gnu // RUN: %if aarch64-registered-target %{ %{check-unroll} %} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits