https://github.com/sjoerdmeijer updated https://github.com/llvm/llvm-project/pull/125830
>From 45aa8d52ef8391fd15d81fb55a39c34f5aec233b Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer <smei...@nvidia.com> Date: Thu, 6 Feb 2025 03:00:24 -0800 Subject: [PATCH] [Clang][Driver] Add an option to control loop-interchange This introduces options -floop-interchange and -fno-loop-interchange to enable/disable the loop-interchange pass. This is part of the work that tries to get that pass enabled by default (#124911), where it was remarked that a user facing option to control this would be convenient to have. The option (name) is the same as GCC's. --- clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 4 ++++ clang/lib/CodeGen/BackendUtil.cpp | 2 ++ clang/lib/Driver/ToolChains/Clang.cpp | 2 ++ clang/lib/Frontend/CompilerInvocation.cpp | 7 +++++++ clang/test/Driver/clang_f_opts.c | 7 +++++++ llvm/include/llvm/Passes/PassBuilder.h | 4 ++++ llvm/lib/Passes/PassBuilderPipelines.cpp | 11 ++++++----- 8 files changed, 33 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 1ab8c7fb4d3c33c..22e6bf4aae6db50 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -319,6 +319,7 @@ CODEGENOPT(TimePassesPerRun , 1, 0) ///< Set when -ftime-report=per-pass-run is CODEGENOPT(TimeTrace , 1, 0) ///< Set when -ftime-trace is enabled. VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (in microseconds), ///< traced by time profiler +CODEGENOPT(InterchangeLoops , 1, 0) ///< Run loop-interchange. CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled. CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled. CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index df705104d9ea314..f49bba96bcd90c5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4161,6 +4161,10 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group<f_Group>, Visibility<[ClangOption, CC1Option]>, HelpText<"Issue call to specified function rather than a trap instruction">, MarshallingInfoString<CodeGenOpts<"TrapFuncName">>; +def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>, + HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>; +def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>, + HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>; def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>, HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 3e65eeb3755d2ff..0f20fba09e44409 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -881,6 +881,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PipelineTuningOptions PTO; PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; + PTO.LoopInterchange = CodeGenOpts.InterchangeLoops; // For historical reasons, loop interleaving is set to mirror setting for loop // unrolling. PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; @@ -1305,6 +1306,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex, initTargetOptions(CI, Diags, Conf.Options); Conf.SampleProfile = std::move(SampleProfile); Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops; + Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops; // For historical reasons, loop interleaving is set to mirror setting for loop // unrolling. Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 33f08cf28feca18..bb7ddb93e338c04 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7041,6 +7041,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings); Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, options::OPT_fno_unroll_loops); + Args.AddLastArg(CmdArgs, options::OPT_floop_interchange, + options::OPT_fno_loop_interchange); Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 3bf124e4827be96..cb030473908fa6c 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1665,6 +1665,11 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, else if (!Opts.UnrollLoops && Opts.OptimizationLevel > 1) GenerateArg(Consumer, OPT_fno_unroll_loops); + if (Opts.InterchangeLoops) + GenerateArg(Consumer, OPT_floop_interchange); + else + GenerateArg(Consumer, OPT_fno_loop_interchange); + if (!Opts.BinutilsVersion.empty()) GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion); @@ -1968,6 +1973,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.UnrollLoops = Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops, (Opts.OptimizationLevel > 1)); + Opts.InterchangeLoops = + Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false); Opts.BinutilsVersion = std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ)); diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 38f25898c955682..7454ce3d30f5fc3 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -45,6 +45,13 @@ // CHECK-UNROLL-LOOPS: "-funroll-loops" // CHECK-NO-UNROLL-LOOPS: "-fno-unroll-loops" +// RUN: %clang -### -S -floop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-INTERCHANGE-LOOPS %s +// RUN: %clang -### -S -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-INTERCHANGE-LOOPS %s +// RUN: %clang -### -S -fno-loop-interchange -floop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-INTERCHANGE-LOOPS %s +// RUN: %clang -### -S -floop-interchange -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-INTERCHANGE-LOOPS %s +// CHECK-INTERCHANGE-LOOPS: "-floop-interchange" +// CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange" + // RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s // CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate" diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index e7bc3a58f414f15..361b9b678f9be7f 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -60,6 +60,10 @@ class PipelineTuningOptions { /// Tuning option to enable/disable loop unrolling. Its default value is true. bool LoopUnrolling; + /// Tuning option to enable/disable loop interchange. Its default value is + /// false. + bool LoopInterchange; + /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value /// is that of the flag: `-forget-scev-loop-unroll`. bool ForgetAllSCEVInLoopUnroll; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 4ec0fb8fc81ea4c..fd59535a1570535 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -200,9 +200,9 @@ static cl::opt<bool> ExtraVectorizerPasses( static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); -static cl::opt<bool> EnableLoopInterchange( - "enable-loopinterchange", cl::init(false), cl::Hidden, - cl::desc("Enable the experimental LoopInterchange Pass")); +static cl::opt<bool> + EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, + cl::desc("Enable the LoopInterchange Pass")); static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, @@ -316,6 +316,7 @@ PipelineTuningOptions::PipelineTuningOptions() { LoopVectorization = true; SLPVectorization = false; LoopUnrolling = true; + LoopInterchange = EnableLoopInterchange; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; @@ -480,7 +481,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, LPM2.addPass(LoopDeletionPass()); - if (EnableLoopInterchange) + if (PTO.LoopInterchange) LPM2.addPass(LoopInterchangePass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO @@ -671,7 +672,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM2.addPass(LoopDeletionPass()); - if (EnableLoopInterchange) + if (PTO.LoopInterchange) LPM2.addPass(LoopInterchangePass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits