[PATCH] D75153: [ThinLTO] Allow usage of all SMT threads in the system

Alexandre Ganea via Phabricator via cfe-commits Tue, 25 Feb 2020 20:20:52 -0800

aganea created this revision.
aganea added reviewers: tejohnson, thakis, rnk, RobRich999.
Herald added subscribers: cfe-commits, dang, dexonsmith, mikhail.ramalho, 
steven_wu, MaskRay, aheejin, hiraditya, arichardson, inglorion, sbc100, emaste.
Herald added a reviewer: espindola.
Herald added projects: clang, LLVM.


Before this patch, it wasn't possible to extend the ThinLTO threads to all 
SMT/CMT/hyper-threads in the system. Only one thread per core was allowed, 
instructed by usage of `llvm::heavyweight_hardware_concurrency()` in places in 
the ThinLTO objects initialization.

Any number passed to the LLD flag `/opt:lldltojobs=...`, or any other 
ThinLTO-specific flag, would be interpreted in the context of 
`llvm::heavyweight_hardware_concurrency()`, which means SMT disabled.

After this patch, one can say in LLD: `/opt:lldltojobs=all` -- which means to 
ignore the `llvm::heavyweight_hardware_concurrency()` requirement, and instead 
use all SMT/CMT/hyper-threads in the system, which is equivalent to using 
`llvm::hardware_concurrency()`.

All cmd-line flags and code paths that lead to initialize ThinLTO have been 
modified by this patch: `-flto-jobs=...`, `-threads=`, `--thinlto-jobs=...`, 
`-thinlto-threads=...`, `--plugin-opt=jobs=...`.

Summary
-------

To sum up:
`/opt:lldltojobs=0` -- use all threads available, but constrained by usage of 
`heavyweight_hardware_concurrency()` or `hardware_concurrency()`.
`/opt:lldltojobs=N` -- limit usage to N threads, but constrained by usage of 
`heavyweight_hardware_concurrency()` or `hardware_concurrency()`.
`/opt:lldltojobs=all` -- use all threads available, regardless of usage of 
`heavyweight_hardware_concurrency()` or `hardware_concurrency()`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D75153

Files:
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Darwin.cpp
  lld/COFF/Config.h
  lld/COFF/Driver.cpp
  lld/COFF/LTO.cpp
  lld/ELF/Config.h
  lld/ELF/Driver.cpp
  lld/ELF/LTO.cpp
  lld/test/COFF/thinlto.ll
  lld/test/ELF/basic.s
  lld/test/ELF/lto/thinlto.ll
  lld/test/wasm/lto/thinlto.ll
  lld/wasm/Config.h
  lld/wasm/Driver.cpp
  lld/wasm/LTO.cpp
  llvm/include/llvm/LTO/LTO.h
  llvm/lib/LTO/LTO.cpp
  llvm/test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll
  llvm/tools/gold/gold-plugin.cpp
  llvm/tools/llvm-lto2/llvm-lto2.cpp

Index: llvm/tools/llvm-lto2/llvm-lto2.cpp
===================================================================
--- llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -66,9 +66,10 @@
                                        "distributed backend case"));
 
 // Default to using all available threads in the system, but using only one
-// thread per core, as indicated by the usage of
-// heavyweight_hardware_concurrency() in the InProcessThinBackend constructor.
-static cl::opt<int> Threads("thinlto-threads", cl::init(0));
+// thread per core (no SMT).
+// Use -thinlto-threads=all to use hardware_concurrency() instead, which means
+// to use all hardware threads or cores in the system.
+static cl::opt<std::string> Threads("thinlto-threads");
 
 static cl::list<std::string> SymbolResolutions(
     "r",
@@ -276,6 +277,16 @@
   Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
   Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
 
+  auto getStrategy = [](StringRef Num) {
+    if (Num == "all")
+      return llvm::hardware_concurrency();
+    if (Num.empty())
+      return ThreadPoolStrategy();
+    unsigned V;
+    Num.getAsInteger(10, V);
+    return llvm::heavyweight_hardware_concurrency(V);
+  };
+
   ThinBackend Backend;
   if (ThinLTODistributedIndexes)
     Backend = createWriteIndexesThinBackend(/* OldPrefix */ "",
@@ -284,7 +295,7 @@
                                             /* LinkedObjectsFile */ nullptr,
                                             /* OnWrite */ {});
   else
-    Backend = createInProcessThinBackend(Threads);
+    Backend = createInProcessThinBackend(getStrategy(Threads));
   LTO Lto(std::move(Conf), std::move(Backend));
 
   bool HasErrors = false;
Index: llvm/tools/gold/gold-plugin.cpp
===================================================================
--- llvm/tools/gold/gold-plugin.cpp
+++ llvm/tools/gold/gold-plugin.cpp
@@ -139,6 +139,7 @@
   static unsigned Parallelism = 0;
   // Default regular LTO codegen parallelism (number of partitions).
   static unsigned ParallelCodeGenParallelismLevel = 1;
+  static bool ParallelismHeavyWeight = true;
 #ifdef NDEBUG
   static bool DisableVerify = true;
 #else
@@ -270,7 +271,10 @@
         message(LDPL_FATAL, "Optimization level must be between 0 and 3");
       OptLevel = opt[1] - '0';
     } else if (opt.startswith("jobs=")) {
-      if (StringRef(opt_ + 5).getAsInteger(10, Parallelism))
+      StringRef Num(opt_ + 5);
+      if (Num == "all")
+        ParallelismHeavyWeight = false;
+      else if (Num.getAsInteger(10, Parallelism))
         message(LDPL_FATAL, "Invalid parallelism level: %s", opt_ + 5);
     } else if (opt.startswith("lto-partitions=")) {
       if (opt.substr(strlen("lto-partitions="))
@@ -875,14 +879,18 @@
   Conf.PTO.LoopVectorization = options::OptLevel > 1;
   Conf.PTO.SLPVectorization = options::OptLevel > 1;
 
-  if (options::Parallelism)
-    Backend = createInProcessThinBackend(options::Parallelism);
   if (options::thinlto_index_only) {
     std::string OldPrefix, NewPrefix;
     getThinLTOOldAndNewPrefix(OldPrefix, NewPrefix);
     Backend = createWriteIndexesThinBackend(OldPrefix, NewPrefix,
                                             options::thinlto_emit_imports_files,
                                             LinkedObjectsFile, OnIndexWrite);
+  } else {
+    ThreadPoolStrategy S =
+        options::ParallelismHeavyWeight
+            ? llvm::heavyweight_hardware_concurrency(options::Parallelism)
+            : llvm::hardware_concurrency();
+    Backend = createInProcessThinBackend(S);
   }
 
   Conf.OverrideTriple = options::triple;
Index: llvm/test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll
===================================================================
--- llvm/test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll
+++ llvm/test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll
@@ -7,6 +7,10 @@
 ; Test to make sure importing and dead stripping works in the
 ; case where the target is a local function that also indirectly calls itself.
 ; RUN: llvm-lto2 run -thinlto-threads=1 -save-temps -o %t3 %t.bc %t2.bc -r %t.bc,fptr,plx -r %t.bc,main,plx -r %t2.bc,_Z6updatei,pl -r %t2.bc,fptr,l -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
+
+; Also test with all threads on
+; RUN: llvm-lto2 run -thinlto-threads=all -save-temps -o %t3 %t.bc %t2.bc -r %t.bc,fptr,plx -r %t.bc,main,plx -r %t2.bc,_Z6updatei,pl -r %t2.bc,fptr,l -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
+
 ; Make sure we import the promted indirectly called target
 ; IMPORTS: Import _ZL3foov.llvm.0
 
Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -477,7 +477,8 @@
 LTO::ThinLTOState::ThinLTOState(ThinBackend Backend)
     : Backend(Backend), CombinedIndex(/*HaveGVs*/ false) {
   if (!Backend)
-    this->Backend = createInProcessThinBackend();
+    this->Backend =
+        createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
 }
 
 LTO::LTO(Config Conf, ThinBackend Backend,
@@ -1090,13 +1091,12 @@
 public:
   InProcessThinBackend(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
-      unsigned ThinLTOParallelismLevel,
+      ThreadPoolStrategy ThinLTOParallelism,
       const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
       AddStreamFn AddStream, NativeObjectCache Cache)
       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
-        BackendThreadPool(
-            heavyweight_hardware_concurrency(ThinLTOParallelismLevel)),
-        AddStream(std::move(AddStream)), Cache(std::move(Cache)) {
+        BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)),
+        Cache(std::move(Cache)) {
     for (auto &Name : CombinedIndex.cfiFunctionDefs())
       CfiFunctionDefs.insert(
           GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
@@ -1192,13 +1192,13 @@
 };
 } // end anonymous namespace
 
-ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) {
+ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism) {
   return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
              const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
              AddStreamFn AddStream, NativeObjectCache Cache) {
     return std::make_unique<InProcessThinBackend>(
-        Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries,
-        AddStream, Cache);
+        Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream,
+        Cache);
   };
 }
 
Index: llvm/include/llvm/LTO/LTO.h
===================================================================
--- llvm/include/llvm/LTO/LTO.h
+++ llvm/include/llvm/LTO/LTO.h
@@ -228,7 +228,7 @@
 
 /// This ThinBackend runs the individual backend jobs in-process.
 /// The default value means to use one job per hardware core (not hyper-thread).
-ThinBackend createInProcessThinBackend(unsigned ParallelismLevel = 0);
+ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism);
 
 /// This ThinBackend writes individual module indexes to files, instead of
 /// running the individual backend jobs. This backend is for distributed builds
Index: lld/wasm/LTO.cpp
===================================================================
--- lld/wasm/LTO.cpp
+++ lld/wasm/LTO.cpp
@@ -63,10 +63,11 @@
   if (config->saveTemps)
     checkError(c.addSaveTemps(config->outputFile.str() + ".",
                               /*UseInputModulePath*/ true));
-
-  lto::ThinBackend backend;
-  if (config->thinLTOJobs != -1U)
-    backend = lto::createInProcessThinBackend(config->thinLTOJobs);
+  ThreadPoolStrategy S =
+      config->thinLTOJobsHeavyWeightThreads
+          ? llvm::heavyweight_hardware_concurrency(config->thinLTOJobs)
+          : llvm::hardware_concurrency();
+  lto::ThinBackend backend = lto::createInProcessThinBackend(S);
   return std::make_unique<lto::LTO>(std::move(c), backend,
                                      config->ltoPartitions);
 }
Index: lld/wasm/Driver.cpp
===================================================================
--- lld/wasm/Driver.cpp
+++ lld/wasm/Driver.cpp
@@ -342,7 +342,15 @@
   config->thinLTOCachePolicy = CHECK(
       parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)),
       "--thinlto-cache-policy: invalid cache policy");
-  config->thinLTOJobs = args::getInteger(args, OPT_thinlto_jobs, -1u);
+
+  if (auto *arg = args.getLastArgNoClaim(OPT_thinlto_jobs)) {
+    StringRef s = arg->getValue();
+    if (s == "all") {
+      config->thinLTOJobsHeavyWeightThreads = false;
+      arg->claim();
+    } else
+      config->thinLTOJobs = args::getInteger(args, OPT_thinlto_jobs, 0);
+  }
   errorHandler().verbose = args.hasArg(OPT_verbose);
   LLVM_DEBUG(errorHandler().verbose = true);
   threadsEnabled = args.hasFlag(OPT_threads, OPT_no_threads, true);
@@ -395,8 +403,6 @@
     error("invalid optimization level for LTO: " + Twine(config->ltoo));
   if (config->ltoPartitions == 0)
     error("--lto-partitions: number of threads must be > 0");
-  if (config->thinLTOJobs == 0)
-    error("--thinlto-jobs: number of threads must be > 0");
 
   if (config->pie && config->shared)
     error("-shared and -pie may not be used together");
Index: lld/wasm/Config.h
===================================================================
--- lld/wasm/Config.h
+++ lld/wasm/Config.h
@@ -46,6 +46,7 @@
   bool stripDebug;
   bool stackFirst;
   bool trace;
+  bool thinLTOJobsHeavyWeightThreads = true;
   uint32_t globalBase;
   uint32_t initialMemory;
   uint32_t maxMemory;
@@ -53,7 +54,7 @@
   unsigned ltoPartitions;
   unsigned ltoo;
   unsigned optimize;
-  unsigned thinLTOJobs;
+  unsigned thinLTOJobs = 0;
 
   llvm::StringRef entry;
   llvm::StringRef outputFile;
Index: lld/test/wasm/lto/thinlto.ll
===================================================================
--- lld/test/wasm/lto/thinlto.ll
+++ lld/test/wasm/lto/thinlto.ll
@@ -14,7 +14,13 @@
 ; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1
 ; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2
 
-; Check without --thinlto-jobs (which currently default to hardware_concurrency)
+; Test with all threads, on all cores, on all CPU sockets
+; RUN: rm -f %t31.lto.o %t32.lto.o
+; RUN: wasm-ld -r -save-temps --thinlto-jobs=all %t1.o %t2.o -o %t3
+; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1
+; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2
+
+; Check without --thinlto-jobs (which currently defaults to heavyweight_hardware_concurrency, meanning one thread per hardware core -- not SMT)
 ; RUN: wasm-ld -r %t1.o %t2.o -o %t3
 ; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1
 ; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2
Index: lld/test/ELF/lto/thinlto.ll
===================================================================
--- lld/test/ELF/lto/thinlto.ll
+++ lld/test/ELF/lto/thinlto.ll
@@ -16,7 +16,13 @@
 ; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1
 ; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2
 
-; Then check without --thinlto-jobs (which currently default to hardware_concurrency)
+; Test with all threads, on all cores, on all CPU sockets
+; RUN: rm -f %t31.lto.o %t32.lto.o
+; RUN: ld.lld -save-temps --thinlto-jobs=all -shared %t1.o %t2.o -o %t3
+; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1
+; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2
+
+; Then check without --thinlto-jobs (which currently defaults to heavyweight_hardware_concurrency, meanning one thread per hardware core -- not SMT)
 ; RUN: ld.lld -shared %t1.o %t2.o -o %t3
 ; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1
 ; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2
Index: lld/test/ELF/basic.s
===================================================================
--- lld/test/ELF/basic.s
+++ lld/test/ELF/basic.s
@@ -249,9 +249,19 @@
 # RUN: not ld.lld %t --plugin-opt=lto-partitions=0 2>&1 | FileCheck --check-prefix=NOTHREADS %s
 # NOTHREADS: --lto-partitions: number of threads must be > 0
 
-# RUN: not ld.lld %t --thinlto-jobs=0 2>&1 | FileCheck --check-prefix=NOTHREADSTHIN %s
-# RUN: not ld.lld %t --plugin-opt=jobs=0 2>&1 | FileCheck --check-prefix=NOTHREADSTHIN %s
-# NOTHREADSTHIN: --thinlto-jobs: number of threads must be > 0
+# RUN: ld.lld %t --thinlto-jobs=0 -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# RUN: ld.lld %t --thinlto-jobs=1 -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# RUN: ld.lld %t --thinlto-jobs=2 -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# RUN: ld.lld %t --thinlto-jobs=all -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# THREADSTHIN: basic.s.tmp
+# RUN: not ld.lld %t --thinlto-jobs=1- -verbose 2>&1 | FileCheck --check-prefix=BADTHREADSTHIN %s
+# BADTHREADSTHIN: error: --{{.*}}jobs=1-: number expected, but got '1-'
+
+# RUN: ld.lld %t --plugin-opt=jobs=0 -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# RUN: ld.lld %t --plugin-opt=jobs=1 -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# RUN: ld.lld %t --plugin-opt=jobs=2 -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# RUN: ld.lld %t --plugin-opt=jobs=all -verbose 2>&1 | FileCheck --check-prefix=THREADSTHIN %s
+# RUN: not ld.lld %t --plugin-opt=jobs=1- -verbose 2>&1 | FileCheck --check-prefix=BADTHREADSTHIN %s
 
 # RUN: not ld.lld %t -z ifunc-noplt -z text 2>&1 | FileCheck --check-prefix=NOIFUNCPLTNOTEXTREL %s
 # NOIFUNCPLTNOTEXTREL: -z text and -z ifunc-noplt may not be used together
Index: lld/test/COFF/thinlto.ll
===================================================================
--- lld/test/COFF/thinlto.ll
+++ lld/test/COFF/thinlto.ll
@@ -6,6 +6,11 @@
 ; RUN: lld-link /lldsavetemps /out:%T/thinlto/main.exe /entry:main /subsystem:console %T/thinlto/main.obj %T/thinlto/foo.obj
 ; RUN: llvm-nm %T/thinlto/main.exe1.lto.obj | FileCheck %s
 
+; RUN: lld-link /lldsavetemps /out:%T/thinlto/main.exe /entry:main /subsystem:console %T/thinlto/main.obj %T/thinlto/foo.obj /opt:lldltojobs=1
+; RUN: llvm-nm %T/thinlto/main.exe1.lto.obj | FileCheck %s
+; RUN: lld-link /lldsavetemps /out:%T/thinlto/main.exe /entry:main /subsystem:console %T/thinlto/main.obj %T/thinlto/foo.obj /opt:lldltojobs=all
+; RUN: llvm-nm %T/thinlto/main.exe1.lto.obj | FileCheck %s
+
 ; CHECK-NOT: U foo
 
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
Index: lld/ELF/LTO.cpp
===================================================================
--- lld/ELF/LTO.cpp
+++ lld/ELF/LTO.cpp
@@ -146,8 +146,12 @@
         std::string(config->thinLTOPrefixReplace.first),
         std::string(config->thinLTOPrefixReplace.second),
         config->thinLTOEmitImportsFiles, indexFile.get(), onIndexWrite);
-  } else if (config->thinLTOJobs != -1U) {
-    backend = lto::createInProcessThinBackend(config->thinLTOJobs);
+  } else {
+    ThreadPoolStrategy S =
+        config->thinLTOJobsHeavyWeightThreads
+            ? llvm::heavyweight_hardware_concurrency(config->thinLTOJobs)
+            : llvm::hardware_concurrency();
+    backend = lto::createInProcessThinBackend(S);
   }
 
   ltoObj = std::make_unique<lto::LTO>(createConfig(), backend,
Index: lld/ELF/Driver.cpp
===================================================================
--- lld/ELF/Driver.cpp
+++ lld/ELF/Driver.cpp
@@ -977,7 +977,6 @@
   config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) ||
                              args.hasArg(OPT_thinlto_index_only_eq);
   config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq);
-  config->thinLTOJobs = args::getInteger(args, OPT_thinlto_jobs, -1u);
   config->thinLTOObjectSuffixReplace =
       getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq);
   config->thinLTOPrefixReplace =
@@ -1034,6 +1033,15 @@
   for (auto *arg : args.filtered(OPT_plugin_opt))
     parseClangOption(arg->getValue(), arg->getSpelling());
 
+  if (auto *arg = args.getLastArgNoClaim(OPT_thinlto_jobs)) {
+    StringRef s = arg->getValue();
+    if (s == "all") {
+      config->thinLTOJobsHeavyWeightThreads = false;
+      arg->claim();
+    } else
+      config->thinLTOJobs = args::getInteger(args, OPT_thinlto_jobs, 0);
+  }
+
   // Parse -mllvm options.
   for (auto *arg : args.filtered(OPT_mllvm))
     parseClangOption(arg->getValue(), arg->getSpelling());
@@ -1042,8 +1050,6 @@
     error("invalid optimization level for LTO: " + Twine(config->ltoo));
   if (config->ltoPartitions == 0)
     error("--lto-partitions: number of threads must be > 0");
-  if (config->thinLTOJobs == 0)
-    error("--thinlto-jobs: number of threads must be > 0");
 
   if (config->splitStackAdjustSize < 0)
     error("--split-stack-adjust-size: size must be >= 0");
Index: lld/ELF/Config.h
===================================================================
--- lld/ELF/Config.h
+++ lld/ELF/Config.h
@@ -191,6 +191,7 @@
   bool trace;
   bool thinLTOEmitImportsFiles;
   bool thinLTOIndexOnly;
+  bool thinLTOJobsHeavyWeightThreads = true;
   bool timeTraceEnabled;
   bool tocOptimize;
   bool undefinedVersion;
@@ -244,7 +245,7 @@
   unsigned ltoPartitions;
   unsigned ltoo;
   unsigned optimize;
-  unsigned thinLTOJobs;
+  unsigned thinLTOJobs = 0;
   unsigned timeTraceGranularity;
   int32_t splitStackAdjustSize;
 
Index: lld/COFF/LTO.cpp
===================================================================
--- lld/COFF/LTO.cpp
+++ lld/COFF/LTO.cpp
@@ -102,8 +102,12 @@
         std::string(config->thinLTOPrefixReplace.first),
         std::string(config->thinLTOPrefixReplace.second),
         config->thinLTOEmitImportsFiles, indexFile.get(), OnIndexWrite);
-  } else if (config->thinLTOJobs != 0) {
-    backend = lto::createInProcessThinBackend(config->thinLTOJobs);
+  } else {
+    ThreadPoolStrategy S =
+        config->thinLTOJobsHeavyWeightThreads
+            ? llvm::heavyweight_hardware_concurrency(config->thinLTOJobs)
+            : llvm::hardware_concurrency();
+    backend = lto::createInProcessThinBackend(S);
   }
 
   ltoObj = std::make_unique<lto::LTO>(createConfig(), backend,
Index: lld/COFF/Driver.cpp
===================================================================
--- lld/COFF/Driver.cpp
+++ lld/COFF/Driver.cpp
@@ -1416,8 +1416,10 @@
           error("/opt:lldlto: invalid optimization level: " + optLevel);
       } else if (s.startswith("lldltojobs=")) {
         StringRef jobs = s.substr(11);
-        if (jobs.getAsInteger(10, config->thinLTOJobs) ||
-            config->thinLTOJobs == 0)
+        if (jobs == "all")
+          config->thinLTOJobsHeavyWeightThreads = false;
+        else if (jobs.getAsInteger(10, config->thinLTOJobs) ||
+                 config->thinLTOJobs == 0)
           error("/opt:lldltojobs: invalid job count: " + jobs);
       } else if (s.startswith("lldltopartitions=")) {
         StringRef n = s.substr(17);
Index: lld/COFF/Config.h
===================================================================
--- lld/COFF/Config.h
+++ lld/COFF/Config.h
@@ -230,6 +230,8 @@
   bool swaprunNet = false;
   bool thinLTOEmitImportsFiles;
   bool thinLTOIndexOnly;
+  // Used for /opt:lldltojobs=all
+  bool thinLTOJobsHeavyWeightThreads = true;
 };
 
 extern Configuration *config;
Index: clang/lib/Driver/ToolChains/Darwin.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Darwin.cpp
+++ clang/lib/Driver/ToolChains/Darwin.cpp
@@ -605,8 +605,8 @@
 
   getMachOToolChain().addProfileRTLibs(Args, CmdArgs);
 
-  if (unsigned Parallelism =
-          getLTOParallelism(Args, getToolChain().getDriver())) {
+  StringRef Parallelism = getLTOParallelism(Args, getToolChain().getDriver());
+  if (!Parallelism.empty()) {
     CmdArgs.push_back("-mllvm");
     CmdArgs.push_back(Args.MakeArgString("-threads=" + Twine(Parallelism)));
   }
Index: clang/lib/Driver/ToolChains/CommonArgs.h
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.h
+++ clang/lib/Driver/ToolChains/CommonArgs.h
@@ -88,7 +88,8 @@
 
 bool isObjCAutoRefCount(const llvm::opt::ArgList &Args);
 
-unsigned getLTOParallelism(const llvm::opt::ArgList &Args, const Driver &D);
+llvm::StringRef getLTOParallelism(const llvm::opt::ArgList &Args,
+                                  const Driver &D);
 
 bool areOptimizationsEnabled(const llvm::opt::ArgList &Args);
 
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -338,14 +338,17 @@
   }
 }
 
-unsigned tools::getLTOParallelism(const ArgList &Args, const Driver &D) {
+llvm::StringRef tools::getLTOParallelism(const ArgList &Args, const Driver &D) {
   unsigned Parallelism = 0;
   Arg *LtoJobsArg = Args.getLastArg(options::OPT_flto_jobs_EQ);
-  if (LtoJobsArg &&
-      StringRef(LtoJobsArg->getValue()).getAsInteger(10, Parallelism))
-    D.Diag(diag::err_drv_invalid_int_value) << LtoJobsArg->getAsString(Args)
-                                            << LtoJobsArg->getValue();
-  return Parallelism;
+  if (!LtoJobsArg)
+    return {};
+  if (StringRef(LtoJobsArg->getValue()) == "all")
+    return LtoJobsArg->getValue();
+  if (StringRef(LtoJobsArg->getValue()).getAsInteger(10, Parallelism))
+    D.Diag(diag::err_drv_invalid_int_value)
+        << LtoJobsArg->getAsString(Args) << LtoJobsArg->getValue();
+  return LtoJobsArg->getValue();
 }
 
 // CloudABI uses -ffunction-sections and -fdata-sections by default.
@@ -410,7 +413,8 @@
   if (IsThinLTO)
     CmdArgs.push_back("-plugin-opt=thinlto");
 
-  if (unsigned Parallelism = getLTOParallelism(Args, ToolChain.getDriver()))
+  StringRef Parallelism = getLTOParallelism(Args, ToolChain.getDriver());
+  if (!Parallelism.empty())
     CmdArgs.push_back(
         Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism)));

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75153: [ThinLTO] Allow usage of all SMT threads in the system

Reply via email to