https://github.com/DavidTruby updated https://github.com/llvm/llvm-project/pull/119718
>From 0dc613d94560cbe4e8a57eed35d985e9d6dae752 Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Thu, 12 Dec 2024 14:50:19 +0000 Subject: [PATCH] [flang] Add -f[no-]vectorize flags --- clang/include/clang/Driver/Driver.h | 38 ++--- clang/include/clang/Driver/Options.td | 11 +- clang/lib/Driver/Driver.cpp | 133 +++++++++--------- clang/lib/Driver/ToolChains/Clang.cpp | 33 ----- clang/lib/Driver/ToolChains/CommonArgs.cpp | 87 ++++++++---- clang/lib/Driver/ToolChains/CommonArgs.h | 2 + clang/lib/Driver/ToolChains/Flang.cpp | 10 ++ .../include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 4 + flang/lib/Frontend/FrontendActions.cpp | 2 + flang/test/Driver/optimization-remark.f90 | 22 +-- flang/test/Integration/unroll-loops.f90 | 4 +- flang/test/Lower/HLFIR/unroll-loops.fir | 4 +- 13 files changed, 175 insertions(+), 176 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index f4a52cc529b79cd..bca9cfd85c367cd 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -55,12 +55,7 @@ class JobAction; class ToolChain; /// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options. -enum LTOKind { - LTOK_None, - LTOK_Full, - LTOK_Thin, - LTOK_Unknown -}; +enum LTOKind { LTOK_None, LTOK_Full, LTOK_Thin, LTOK_Unknown }; /// Whether headers used to construct C++20 module units should be looked /// up by the path supplied on the command line, or in the user or system @@ -110,17 +105,9 @@ class Driver { DXCMode } Mode; - enum SaveTempsMode { - SaveTempsNone, - SaveTempsCwd, - SaveTempsObj - } SaveTemps; + enum SaveTempsMode { SaveTempsNone, SaveTempsCwd, SaveTempsObj } SaveTemps; - enum BitcodeEmbedMode { - EmbedNone, - EmbedMarker, - EmbedBitcode - } BitcodeEmbed; + enum BitcodeEmbedMode { EmbedNone, EmbedMarker, EmbedBitcode } BitcodeEmbed; enum OffloadMode { OffloadHostDevice, @@ -166,9 +153,7 @@ class Driver { }; // Diag - Forwarding function for diagnostics. - DiagnosticBuilder Diag(unsigned DiagID) const { - return Diags.Report(DiagID); - } + DiagnosticBuilder Diag(unsigned DiagID) const { return Diags.Report(DiagID); } // FIXME: Privatize once interface is stable. public: @@ -404,7 +389,6 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. static std::string GetResourcesPath(StringRef BinaryPath); @@ -419,9 +403,7 @@ class Driver { /// Name to use when invoking gcc/g++. const std::string &getCCCGenericGCCName() const { return CCCGenericGCCName; } - llvm::ArrayRef<std::string> getConfigFiles() const { - return ConfigFiles; - } + llvm::ArrayRef<std::string> getConfigFiles() const { return ConfigFiles; } const llvm::opt::OptTable &getOpts() const { return getDriverOptTable(); } @@ -447,9 +429,7 @@ class Driver { std::string getTargetTriple() const { return TargetTriple; } /// Get the path to the main clang executable. - const char *getClangProgramPath() const { - return ClangExecutable.c_str(); - } + const char *getClangProgramPath() const { return ClangExecutable.c_str(); } bool isSaveTempsEnabled() const { return SaveTemps != SaveTempsNone; } bool isSaveTempsObj() const { return SaveTemps == SaveTempsObj; } @@ -561,8 +541,9 @@ class Driver { /// This routine handles additional processing that must be done in addition /// to just running the subprocesses, for example reporting errors, setting /// up response files, removing temporary files, etc. - int ExecuteCompilation(Compilation &C, - SmallVectorImpl< std::pair<int, const Command *> > &FailingCommands); + int ExecuteCompilation( + Compilation &C, + SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands); /// Contains the files in the compilation diagnostic report generated by /// generateCompilationDiagnostics. @@ -758,7 +739,6 @@ class Driver { const CUIDOptions &getCUIDOpts() const { return CUIDOpts; } private: - /// Tries to load options from configuration files. /// /// \returns true if error occurred. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 77ca2d2aac31be1..3cc9492eac1c200 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3981,11 +3981,15 @@ defm assumptions : BoolFOption<"assumptions", "Disable codegen and compile-time checks for C++23's [[assume]] attribute">, PosFlag<SetTrue>>; + +let Visibility = [ClangOption, FlangOption] in { def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>, HelpText<"Enable the loop vectorization passes">; def fno_vectorize : Flag<["-"], "fno-vectorize">, Group<f_Group>; def : Flag<["-"], "ftree-vectorize">, Alias<fvectorize>; def : Flag<["-"], "fno-tree-vectorize">, Alias<fno_vectorize>; +} + def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group<f_Group>, HelpText<"Enable the superword-level parallelism vectorization passes">; def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group<f_Group>; @@ -7315,6 +7319,10 @@ def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">, def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">, HelpText<"Link the given bitcode file before performing optimizations.">; + +def vectorize_loops : Flag<["-"], "vectorize-loops">, + HelpText<"Run the Loop vectorization passes">, + MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>; } // let Visibility = [CC1Option, FC1Option] let Visibility = [CC1Option] in { @@ -7430,9 +7438,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt", PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the " "optimization pipeline">, NegFlag<SetFalse, [], [ClangOption]>>; -def vectorize_loops : Flag<["-"], "vectorize-loops">, - HelpText<"Run the Loop vectorization passes">, - MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>; def vectorize_slp : Flag<["-"], "vectorize-slp">, HelpText<"Run the SLP vectorization passes">, MarshallingInfoFlag<CodeGenOpts<"VectorizeSLP">>; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 5a4737fb381e6a0..dc5424a454513a5 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -398,8 +398,7 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) || (PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) || (PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) || - (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || - CCGenDiagnostics) { + (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || CCGenDiagnostics) { FinalPhase = phases::Preprocess; // --precompile only runs up to precompilation. @@ -424,18 +423,18 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, (PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) { FinalPhase = phases::Compile; - // -S only runs up to the backend. + // -S only runs up to the backend. } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) { FinalPhase = phases::Backend; - // -c compilation only runs up to the assembler. + // -c compilation only runs up to the assembler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) { FinalPhase = phases::Assemble; } else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_interface_stubs))) { FinalPhase = phases::IfsMerge; - // Otherwise do everything. + // Otherwise do everything. } else FinalPhase = phases::Link; @@ -568,8 +567,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const { /// /// This routine provides the logic to compute a target triple from various /// args passed to the driver and the default triple string. -static llvm::Triple computeTargetTriple(const Driver &D, - StringRef TargetTriple, +static llvm::Triple computeTargetTriple(const Driver &D, StringRef TargetTriple, const ArgList &Args, StringRef DarwinArchName = "") { // FIXME: Already done in Compilation *Driver::BuildCompilation @@ -692,8 +690,8 @@ static llvm::Triple computeTargetTriple(const Driver &D, // Handle -miamcu flag. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) { if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86) - D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu" - << Target.str(); + D.Diag(diag::err_drv_unsupported_opt_for_target) + << "-miamcu" << Target.str(); if (A && !A->getOption().matches(options::OPT_m32)) D.Diag(diag::err_drv_argument_not_allowed_with) @@ -1627,14 +1625,13 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { StringRef Name = A->getValue(); unsigned Model = llvm::StringSwitch<unsigned>(Name) - .Case("off", EmbedNone) - .Case("all", EmbedBitcode) - .Case("bitcode", EmbedBitcode) - .Case("marker", EmbedMarker) - .Default(~0U); + .Case("off", EmbedNone) + .Case("all", EmbedBitcode) + .Case("bitcode", EmbedBitcode) + .Case("marker", EmbedMarker) + .Default(~0U); if (Model == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) - << Name; + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model); } @@ -1863,7 +1860,7 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename, size_t LineEnd = Data.find_first_of("\n", ParentProcPos); if (LineEnd == StringRef::npos) continue; - StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim(); + StringRef ParentProcess = Data.slice(ParentProcPos + 15, LineEnd).trim(); int OpenBracket = -1, CloseBracket = -1; for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) { if (ParentProcess[i] == '[') @@ -1876,7 +1873,8 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename, int CrashPID; if (OpenBracket < 0 || CloseBracket < 0 || ParentProcess.slice(OpenBracket + 1, CloseBracket) - .getAsInteger(10, CrashPID) || CrashPID != PID) { + .getAsInteger(10, CrashPID) || + CrashPID != PID) { continue; } @@ -2132,8 +2130,7 @@ void Driver::generateCompilationDiagnostics( CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash"; Diag(clang::diag::note_drv_command_failed_diag_msg) << "Crash backtrace is located in"; - Diag(clang::diag::note_drv_command_failed_diag_msg) - << CrashDiagDir.str(); + Diag(clang::diag::note_drv_command_failed_diag_msg) << CrashDiagDir.str(); Diag(clang::diag::note_drv_command_failed_diag_msg) << "(choose the .crash file that corresponds to your crash)"; } @@ -2245,8 +2242,7 @@ void Driver::PrintHelp(bool ShowHidden) const { std::string Usage = llvm::formatv("{0} [options] file...", Name).str(); getOpts().printHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(), - ShowHidden, /*ShowAllAliases=*/false, - VisibilityMask); + ShowHidden, /*ShowAllAliases=*/false, VisibilityMask); } void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { @@ -2423,11 +2419,11 @@ bool Driver::HandleImmediateArgs(Compilation &C) { if (C.getArgs().hasArg(options::OPT_v)) { if (!SystemConfigDir.empty()) - llvm::errs() << "System configuration file directory: " - << SystemConfigDir << "\n"; + llvm::errs() << "System configuration file directory: " << SystemConfigDir + << "\n"; if (!UserConfigDir.empty()) - llvm::errs() << "User configuration file directory: " - << UserConfigDir << "\n"; + llvm::errs() << "User configuration file directory: " << UserConfigDir + << "\n"; } const ToolChain &TC = C.getDefaultToolChain(); @@ -2506,7 +2502,7 @@ bool Driver::HandleImmediateArgs(Compilation &C) { StringRef ProgName = A->getValue(); // Null program name cannot have a path. - if (! ProgName.empty()) + if (!ProgName.empty()) llvm::outs() << GetProgramPath(ProgName, TC); llvm::outs() << "\n"; @@ -2740,7 +2736,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, // Add in arch bindings for every top level action, as well as lipo and // dsymutil steps if needed. - for (Action* Act : SingleActions) { + for (Action *Act : SingleActions) { // Make sure we can lipo this kind of output. If not (and it is an actual // output) then we disallow, since we can't create an output file with the // right name without overwriting it. We could remove this oddity by just @@ -2783,7 +2779,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, // Verify the debug info output. if (Args.hasArg(options::OPT_verify_debug_info)) { - Action* LastAction = Actions.back(); + Action *LastAction = Actions.back(); Actions.pop_back(); Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>( LastAction, types::TY_Nothing)); @@ -2964,7 +2960,8 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, Ty = TC.LookupTypeForExtension(Ext + 1); if (Ty == types::TY_INVALID) { - if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics)) + if (IsCLMode() && + (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics)) Ty = types::TY_CXX; else if (CCCIsCPP() || CCGenDiagnostics) Ty = types::TY_C; @@ -3168,7 +3165,7 @@ class OffloadingActionBuilder final { virtual void appendLinkDeviceActions(ActionList &AL) {} /// Append linker host action generated by the builder. - virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; } + virtual Action *appendLinkHostActions(ActionList &AL) { return nullptr; } /// Append linker actions generated by the builder. virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} @@ -3837,15 +3834,15 @@ class OffloadingActionBuilder final { for (auto &LI : DeviceLinkerInputs) { types::ID Output = Args.hasArg(options::OPT_emit_llvm) - ? types::TY_LLVM_BC - : types::TY_Image; + ? types::TY_LLVM_BC + : types::TY_Image; auto *DeviceLinkAction = C.MakeAction<LinkJobAction>(LI, Output); // Linking all inputs for the current GPU arch. // LI contains all the inputs for the linker. OffloadAction::DeviceDependences DeviceLinkDeps; - DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], - GpuArchList[I], AssociatedOffloadKind); + DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], + AssociatedOffloadKind); Actions.push_back(C.MakeAction<OffloadAction>( DeviceLinkDeps, DeviceLinkAction->getType())); ++I; @@ -3854,8 +3851,8 @@ class OffloadingActionBuilder final { // If emitting LLVM, do not generate final host/device compilation action if (Args.hasArg(options::OPT_emit_llvm)) { - AL.append(Actions); - return; + AL.append(Actions); + return; } // Create a host object from all the device images by embedding them @@ -3876,7 +3873,7 @@ class OffloadingActionBuilder final { } } - Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); } + Action *appendLinkHostActions(ActionList &AL) override { return AL.back(); } void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} }; @@ -4118,7 +4115,7 @@ class OffloadingActionBuilder final { return nullptr; // Let builders add host linking actions. - Action* HA = nullptr; + Action *HA = nullptr; for (DeviceActionBuilder *SB : SpecializedBuilders) { if (!SB->isValid()) continue; @@ -4213,7 +4210,8 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args, getOpts().getOption(options::OPT_frtlib_add_rpath)); } // Emitting LLVM while linking disabled except in HIPAMD Toolchain - if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link)) + if (Args.hasArg(options::OPT_emit_llvm) && + !Args.hasArg(options::OPT_hip_link)) Diag(clang::diag::err_drv_emit_llvm_link); if (C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() && LTOMode != LTOK_None && @@ -5589,8 +5587,8 @@ class ToolSelector final { continue; } - // This is legal to combine. Append any offload action we found and add the - // current input to preprocessor inputs. + // This is legal to combine. Append any offload action we found and add + // the current input to preprocessor inputs. CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(), PreprocessJobOffloadActions.end()); NewInputs.append(PJ->input_begin(), PJ->input_end()); @@ -5613,8 +5611,7 @@ class ToolSelector final { /// connected to collapsed actions are updated accordingly. The latter enables /// the caller of the selector to process them afterwards instead of just /// dropping them. If no suitable tool is found, null will be returned. - const Tool *getTool(ActionList &Inputs, - ActionList &CollapsedOffloadAction) { + const Tool *getTool(ActionList &Inputs, ActionList &CollapsedOffloadAction) { // // Get the largest chain of actions that we could combine. // @@ -5657,7 +5654,7 @@ class ToolSelector final { return T; } }; -} +} // namespace /// Return a string that uniquely identifies the result of a job. The bound arch /// is not necessarily represented in the toolchain's triple -- for example, @@ -5828,9 +5825,9 @@ InputInfoList Driver::BuildJobsForActionNoCache( StringRef ArchName = BAA->getArchName(); if (!ArchName.empty()) - TC = &getToolChain(C.getArgs(), - computeTargetTriple(*this, TargetTriple, - C.getArgs(), ArchName)); + TC = &getToolChain( + C.getArgs(), + computeTargetTriple(*this, TargetTriple, C.getArgs(), ArchName)); else TC = &C.getDefaultToolChain(); @@ -5839,7 +5836,6 @@ InputInfoList Driver::BuildJobsForActionNoCache( TargetDeviceOffloadKind); } - ActionList Inputs = A->getInputs(); const JobAction *JA = cast<JobAction>(A); @@ -5973,10 +5969,11 @@ InputInfoList Driver::BuildJobsForActionNoCache( /*CreatePrefixForHost=*/isa<OffloadPackagerJobAction>(A) || !(A->getOffloadingHostActiveKinds() == Action::OFK_None || AtTopLevel)); - Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, - AtTopLevel, MultipleArchs, - OffloadingPrefix), - BaseInput); + Result = + InputInfo(A, + GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, + MultipleArchs, OffloadingPrefix), + BaseInput); if (T->canEmitIR() && OffloadingPrefix.empty()) handleTimeTrace(C, Args, JA, BaseInput, Result); } @@ -6304,12 +6301,10 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, } } else if (JA.getType() == types::TY_PCH && IsCLMode()) { NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName)); - } else if ((JA.getType() == types::TY_Plist || JA.getType() == types::TY_AST) && + } else if ((JA.getType() == types::TY_Plist || + JA.getType() == types::TY_AST) && C.getArgs().hasArg(options::OPT__SLASH_o)) { - StringRef Val = - C.getArgs() - .getLastArg(options::OPT__SLASH_o) - ->getValue(); + StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_o)->getValue(); NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object); } else { @@ -6645,15 +6640,15 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::Linux: case llvm::Triple::ELFIAMCU: if (Target.getArch() == llvm::Triple::hexagon) - TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target, - Args); + TC = + std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args); else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) && !Target.hasEnvironment()) TC = std::make_unique<toolchains::MipsLLVMToolChain>(*this, Target, - Args); + Args); else if (Target.isPPC()) TC = std::make_unique<toolchains::PPCLinuxToolChain>(*this, Target, - Args); + Args); else if (Target.getArch() == llvm::Triple::ve) TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args); else if (Target.isOHOSFamily()) @@ -6698,7 +6693,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, break; case llvm::Triple::Itanium: TC = std::make_unique<toolchains::CrossWindowsToolChain>(*this, Target, - Args); + Args); break; case llvm::Triple::MSVC: case llvm::Triple::UnknownEnvironment: @@ -6707,8 +6702,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique<toolchains::CrossWindowsToolChain>( *this, Target, Args); else - TC = - std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args); + TC = std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args); break; } break; @@ -6742,8 +6736,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique<toolchains::TCELEToolChain>(*this, Target, Args); break; case llvm::Triple::hexagon: - TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target, - Args); + TC = + std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args); break; case llvm::Triple::lanai: TC = std::make_unique<toolchains::LanaiToolChain>(*this, Target, Args); @@ -6759,8 +6753,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique<toolchains::AVRToolChain>(*this, Target, Args); break; case llvm::Triple::msp430: - TC = - std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args); + TC = std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args); break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: @@ -6943,7 +6936,7 @@ Driver::getOptionVisibilityMask(bool UseDriverMode) const { return llvm::opt::Visibility(options::CLOption); if (IsDXCMode()) return llvm::opt::Visibility(options::DXCOption); - if (IsFlangMode()) { + if (IsFlangMode()) { return llvm::opt::Visibility(options::FlangOption); } return llvm::opt::Visibility(options::ClangOption); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a0757d71b140c20..2c98fc1d01feda9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -511,39 +511,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, } } -/// Vectorize at all optimization levels greater than 1 except for -Oz. -/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is -/// enabled. -static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - return true; - - if (A->getOption().matches(options::OPT_O0)) - return false; - - assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); - - // Vectorize -Os. - StringRef S(A->getValue()); - if (S == "s") - return true; - - // Don't vectorize -Oz, unless it's the slp vectorizer. - if (S == "z") - return isSlpVec; - - unsigned OptLevel = 0; - if (S.getAsInteger(10, OptLevel)) - return false; - - return OptLevel > 1; - } - - return false; -} - /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 699aadec86dcba9..7c3eb8c9732366b 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -973,7 +973,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, // LowerMatrixIntrinsicsPass, which is transitively called by // buildThinLTODefaultPipeline under EnableMatrix. if ((IsThinLTO || IsFatLTO || IsUnifiedLTO) && - Args.hasArg(options::OPT_fenable_matrix)) + Args.hasArg(options::OPT_fenable_matrix)) CmdArgs.push_back( Args.MakeArgString(Twine(PluginOptPrefix) + "-enable-matrix")); @@ -1284,7 +1284,7 @@ bool tools::addOpenMPRuntime(const Compilation &C, ArgStringList &CmdArgs, CmdArgs.push_back("-Bdynamic"); if (RTKind == Driver::OMPRT_GOMP && GompNeedsRT) - CmdArgs.push_back("-lrt"); + CmdArgs.push_back("-lrt"); if (IsOffloadingHost) CmdArgs.push_back("-lomptarget"); @@ -1379,10 +1379,12 @@ static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args, bool IsShared, bool IsWhole) { // Wrap any static runtimes that must be forced into executable in // whole-archive. - if (IsWhole) CmdArgs.push_back("--whole-archive"); + if (IsWhole) + CmdArgs.push_back("--whole-archive"); CmdArgs.push_back(TC.getCompilerRTArgString( Args, Sanitizer, IsShared ? ToolChain::FT_Shared : ToolChain::FT_Static)); - if (IsWhole) CmdArgs.push_back("--no-whole-archive"); + if (IsWhole) + CmdArgs.push_back("--no-whole-archive"); if (IsShared) { addArchSpecificRPath(TC, Args, CmdArgs); @@ -1678,7 +1680,8 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, return !StaticRuntimes.empty() || !NonWholeStaticRuntimes.empty(); } -bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) { +bool tools::addXRayRuntime(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs) { if (Args.hasArg(options::OPT_shared)) { if (TC.getXRayArgs().needsXRayDSORt()) { CmdArgs.push_back("--whole-archive"); @@ -1707,8 +1710,7 @@ void tools::linkXRayRuntimeDeps(const ToolChain &TC, CmdArgs.push_back("-lrt"); CmdArgs.push_back("-lm"); - if (!TC.getTriple().isOSFreeBSD() && - !TC.getTriple().isOSNetBSD() && + if (!TC.getTriple().isOSFreeBSD() && !TC.getTriple().isOSNetBSD() && !TC.getTriple().isOSOpenBSD()) CmdArgs.push_back("-ldl"); } @@ -1983,19 +1985,19 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { bool EmbeddedPISupported; switch (Triple.getArch()) { - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - EmbeddedPISupported = true; - break; - default: - EmbeddedPISupported = false; - break; + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + EmbeddedPISupported = true; + break; + default: + EmbeddedPISupported = false; + break; } bool ROPI = false, RWPI = false; - Arg* LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi); + Arg *LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi); if (LastROPIArg && LastROPIArg->getOption().matches(options::OPT_fropi)) { if (!EmbeddedPISupported) ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) @@ -2024,7 +2026,7 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { if (ABIName == "n64") PIC = true; // When targettng MIPS with -mno-abicalls, it's always static. - if(Args.hasArg(options::OPT_mno_abicalls)) + if (Args.hasArg(options::OPT_mno_abicalls)) return std::make_tuple(llvm::Reloc::Static, 0U, false); // Unlike other architectures, MIPS, even with -fPIC/-mxgot/multigot, // does not use PIC level 2 for historical reasons. @@ -2190,7 +2192,8 @@ enum class LibGccType { UnspecifiedLibGcc, StaticLibGcc, SharedLibGcc }; static LibGccType getLibGccType(const ToolChain &TC, const Driver &D, const ArgList &Args) { if (Args.hasArg(options::OPT_static_libgcc) || - Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_pie) || + Args.hasArg(options::OPT_static) || + Args.hasArg(options::OPT_static_pie) || // The Android NDK only provides libunwind.a, not libunwind.so. TC.getTriple().isAndroid()) return LibGccType::StaticLibGcc; @@ -2558,11 +2561,10 @@ static void GetSDLFromOffloadArchive( return; StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib"; - std::string OutputLib = - D.GetTemporaryPath(Twine(Prefix + llvm::sys::path::filename(Lib) + "-" + - Arch + "-" + Target) - .str(), - "a"); + std::string OutputLib = D.GetTemporaryPath( + Twine(Prefix + llvm::sys::path::filename(Lib) + "-" + Arch + "-" + Target) + .str(), + "a"); C.addTempFile(C.getArgs().MakeArgString(OutputLib)); @@ -2775,8 +2777,8 @@ void tools::addMachineOutlinerArgs(const Driver &D, } }; - if (Arg *A = Args.getLastArg(options::OPT_moutline, - options::OPT_mno_outline)) { + if (Arg *A = + Args.getLastArg(options::OPT_moutline, options::OPT_mno_outline)) { if (A->getOption().matches(options::OPT_moutline)) { // We only support -moutline in AArch64 and ARM targets right now. If // we're not compiling for these, emit a warning and ignore the flag. @@ -3115,3 +3117,36 @@ void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, CmdArgs.push_back("-fwrapv-pointer"); } } + +/// Vectorize at all optimization levels greater than 1 except for -Oz. +/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is +/// enabled. +bool tools::shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + return true; + + if (A->getOption().matches(options::OPT_O0)) + return false; + + assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); + + // Vectorize -Os. + StringRef S(A->getValue()); + if (S == "s") + return true; + + // Don't vectorize -Oz, unless it's the slp vectorizer. + if (S == "z") + return isSlpVec; + + unsigned OptLevel = 0; + if (S.getAsInteger(10, OptLevel)) + return false; + + return OptLevel > 1; + } + + return false; +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index b6ddd99b872798e..783a1f834b33d73 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -265,6 +265,8 @@ bool shouldRecordCommandLine(const ToolChain &TC, void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); +bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args, + bool isSlpVec); } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index e4019c434968744..1fe2d30da72aaf5 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -149,6 +149,16 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + // Enable vectorization per default according to the optimization level + // selected. For optimization levels that want vectorization we use the alias + // option to simplify the hasFlag logic. + bool enableVec = shouldEnableVectorizerAtOLevel(Args, false); + OptSpecifier vectorizeAliasOption = + enableVec ? options::OPT_O_Group : options::OPT_fvectorize; + if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption, + options::OPT_fno_vectorize, enableVec)) + CmdArgs.push_back("-vectorize-loops"); + if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index deb8d1aede518b3..44cb5a2cdd497ad 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the ///< compile step. CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) +CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index a2c1d3efef6cf3c..66717d7cf1ad476 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/AllDiagnostics.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/OptionUtils.h" #include "clang/Driver/Options.h" @@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::options::OPT_fno_stack_arrays, false)) opts.StackArrays = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) + opts.VectorizeLoop = 1; + if (args.hasFlag(clang::driver::options::OPT_floop_versioning, clang::driver::options::OPT_fno_loop_versioning, false)) opts.LoopVersioning = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index b0545a7ac2f99a7..af32eba56120b45 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -1030,6 +1030,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { si.getTimePasses().setOutStream(ci.getTimingStreamLLVM()); pto.LoopUnrolling = opts.UnrollLoops; pto.LoopInterleaving = opts.UnrollLoops; + pto.LoopVectorization = opts.VectorizeLoop; + llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic); // Attempt to load pass plugins and register their callbacks with PB. diff --git a/flang/test/Driver/optimization-remark.f90 b/flang/test/Driver/optimization-remark.f90 index e90baa892f46a01..90e310d36c807e9 100644 --- a/flang/test/Driver/optimization-remark.f90 +++ b/flang/test/Driver/optimization-remark.f90 @@ -5,33 +5,33 @@ ! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1 ! Check fc1 can handle -Rpass -! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS ! Check that we can override -Rpass= with -Rno-pass. -! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted -! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check valid -Rpass regex -! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY ! Check valid -Rpass-missed regex -! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY ! Check valid -Rpass-analysis regex -! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY ! Check full -Rpass message is emitted -! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS +! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS ! Check full -Rpass-missed message is emitted -! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED +! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED ! Check full -Rpass-analysis message is emitted -! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS +! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS ! REMARKS: remark: ! NO-REMARKS-NOT: remark: diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index c3fcf1c3a7cf3af..debe45e0ec35957 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -1,8 +1,8 @@ ! FIXME: https://github.com/llvm/llvm-project/issues/123668 ! ! DEFINE: %{triple} = -! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL ! ! REDEFINE: %{triple} = aarch64-unknown-linux-gnu ! RUN: %if aarch64-registered-target %{ %{check-unroll} %} diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir index 6a9dd28a37b6d92..1321f39677405b9 100644 --- a/flang/test/Lower/HLFIR/unroll-loops.fir +++ b/flang/test/Lower/HLFIR/unroll-loops.fir @@ -1,6 +1,6 @@ // DEFINE: %{triple} = -// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL // REDEFINE: %{triple} = aarch64-unknown-linux-gnu // RUN: %if aarch64-registered-target %{ %{check-unroll} %} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits