Re: [PATCH] D13144: [CUDA] propagate to CUDA sub-compilations target triple of opposite side.

Artem Belevich via cfe-commits Fri, 13 Nov 2015 10:42:37 -0800

tra updated this revision to Diff 40163.
tra added a comment.

Added a FIXME note regarding figuring out which compilation pass we're 
constructing.
Undone the change to make getToolchain public as it's no longer accessed from 
outside of Driver.
Converted the if(AuxToolchain) into assert.



http://reviews.llvm.org/D13144

Files:
  include/clang/Driver/Action.h
  include/clang/Driver/Compilation.h
  include/clang/Driver/Driver.h
  lib/Driver/Action.cpp
  lib/Driver/Compilation.cpp
  lib/Driver/Driver.cpp
  lib/Driver/Tools.cpp
  test/Driver/cuda-options.cu
  test/SemaCUDA/function-target-hd.cu

Index: test/SemaCUDA/function-target-hd.cu
===================================================================
--- test/SemaCUDA/function-target-hd.cu
+++ test/SemaCUDA/function-target-hd.cu
@@ -8,9 +8,9 @@
 // host device functions are not allowed to call device functions.
 
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s
 // RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
 
 #include "Inputs/cuda.h"
 
Index: test/Driver/cuda-options.cu
===================================================================
--- test/Driver/cuda-options.cu
+++ test/Driver/cuda-options.cu
@@ -111,14 +111,6 @@
 // Make sure we don't link anything.
 // RUN:   -check-prefix CUDA-NL %s
 
-// Match device-side preprocessor, and compiler phases with -save-temps
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda"
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
-
 // --cuda-host-only should never trigger unused arg warning.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \
 // RUN:    FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s
@@ -133,34 +125,47 @@
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \
 // RUN:    FileCheck -check-prefix CUDA-UNUSED-CDO %s
 
+// Match device-side preprocessor, and compiler phases with -save-temps
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda"
+
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
+
 // Match the job that produces PTX assembly
-// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D1-SAME: "-fcuda-is-device"
 // CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
 // CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
 // CUDA-D1NS-SAME: "-x" "cuda"
 // CUDA-D1S-SAME: "-x" "ir"
 
-// Match anothe device-side compilation
-// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// Match another device-side compilation
+// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D2-SAME: "-fcuda-is-device"
 // CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
 // CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
 // CUDA-D2-SAME: "-x" "cuda"
 
 // Match no device-side compilation
-// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // CUDA-ND-SAME-NOT: "-fcuda-is-device"
 
 // Match host-side preprocessor job with -save-temps
-// CUDA-HS: "-cc1" "-triple"
-// CUDA-HS-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // CUDA-HS-SAME-NOT: "-fcuda-is-device"
 // CUDA-HS-SAME: "-x" "cuda"
 
 // Match host-side compilation
-// CUDA-H: "-cc1" "-triple"
-// CUDA-H-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // CUDA-H-SAME-NOT: "-fcuda-is-device"
 // CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
 // CUDA-HNS-SAME: "-x" "cuda"
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -3264,6 +3264,23 @@
   CmdArgs.push_back("-triple");
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
+  if (IsCuda) {
+    // FIXME: We need a (better) way to pass information about
+    // particular compilation pass we're constructing here. For now we
+    // can check which toolchain we're using and pick the other one to
+    // extract the triple.
+    const ToolChain *AuxToolChain;
+    if (&getToolChain() == C.getCudaDeviceToolChain())
+      AuxToolChain = C.getCudaHostToolChain();
+    else if (&getToolChain() == C.getCudaHostToolChain())
+      AuxToolChain = C.getCudaDeviceToolChain();
+    else
+      llvm_unreachable("Can't figure out CUDA compilation mode.");
+    assert(AuxToolChain != nullptr && "No aux toolchain.");
+    CmdArgs.push_back("-aux-triple");
+    CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
+  }
+
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
                                Triple.getArch() == llvm::Triple::thumb)) {
     unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -491,6 +491,10 @@
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
 
+  C->setCudaDeviceToolChain(
+      &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
+                                                   ? "nvptx64-nvidia-cuda"
+                                                   : "nvptx-nvidia-cuda")));
   if (!HandleImmediateArgs(*C))
     return C;
 
@@ -501,10 +505,9 @@
   // Construct the list of abstract actions to perform for this compilation. On
   // MachO targets this uses the driver-driver and universal actions.
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
-                          C->getActions());
+    BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
   else
-    BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
+    BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs,
                  C->getActions());
 
   if (CCCPrintPhases) {
@@ -617,9 +620,9 @@
   // Darwin OSes this uses the driver-driver and builds universal actions.
   const ToolChain &TC = C.getDefaultToolChain();
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildUniversalActions(C, TC, Inputs);
   else
-    BuildActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildActions(C, TC, C.getArgs(), Inputs, C.getActions());
 
   BuildJobs(C);
 
@@ -948,7 +951,7 @@
     os << '"' << CDA->getGpuArchName() << '"' << ", {"
        << PrintActions1(C, *CDA->begin(), Ids) << "}";
   } else {
-    ActionList *AL;
+    const ActionList *AL;
     if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
       os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
          << ", gpu binaries ";
@@ -997,9 +1000,10 @@
   return false;
 }
 
-void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
-                                   const InputList &BAInputs,
-                                   ActionList &Actions) const {
+void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                                   const InputList &BAInputs) const {
+  DerivedArgList &Args = C.getArgs();
+  ActionList &Actions = C.getActions();
   llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
   // Collect the list of architectures. Duplicates are allowed, but should only
   // be handled once (in the order seen).
@@ -1028,7 +1032,7 @@
     Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
 
   ActionList SingleActions;
-  BuildActions(TC, Args, BAInputs, SingleActions);
+  BuildActions(C, TC, Args, BAInputs, SingleActions);
 
   // Add in arch bindings for every top level action, as well as lipo and
   // dsymutil steps if needed.
@@ -1279,21 +1283,15 @@
 // and returns a new CudaHostAction which wraps /p Current and device
 // side actions.
 static std::unique_ptr<Action>
-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
-                 const Arg *InputArg, std::unique_ptr<Action> HostAction,
-                 ActionList &Actions) {
-  // Figure out which NVPTX triple to use for device-side compilation based on
-  // whether host is 64-bit.
-  const char *DeviceTriple = TC.getTriple().isArch64Bit()
-                                 ? "nvptx64-nvidia-cuda"
-                                 : "nvptx-nvidia-cuda";
+buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
+                 std::unique_ptr<Action> HostAction, ActionList &Actions) {
   Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
                                                options::OPT_cuda_device_only);
   // Host-only compilation case.
   if (PartialCompilationArg &&
       PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
     return std::unique_ptr<Action>(
-        new CudaHostAction(std::move(HostAction), {}, DeviceTriple));
+        new CudaHostAction(std::move(HostAction), {}));
 
   // Collect all cuda_gpu_arch parameters, removing duplicates.
   SmallVector<const char *, 4> GpuArchList;
@@ -1317,8 +1315,11 @@
     CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
 
   // Build actions for all device inputs.
+  assert(C.getCudaDeviceToolChain() &&
+         "Missing toolchain for device-side compilation.");
   ActionList CudaDeviceActions;
-  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
+  C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args,
+                             CudaDeviceInputs, CudaDeviceActions);
   assert(GpuArchList.size() == CudaDeviceActions.size() &&
          "Failed to create actions for all devices");
 
@@ -1342,14 +1343,15 @@
     // -o is ambiguous if we have more than one top-level action.
     if (Args.hasArg(options::OPT_o) &&
         (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
-      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
+      C.getDriver().Diag(
+          clang::diag::err_drv_output_argument_with_multiple_files);
       return nullptr;
     }
 
     for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
       Actions.push_back(new CudaDeviceAction(
           std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-          DeviceTriple, /* AtTopLevel */ true));
+          /* AtTopLevel */ true));
     // Kill host action in case of device-only compilation.
     if (DeviceOnlyCompilation)
       HostAction.reset(nullptr);
@@ -1362,15 +1364,16 @@
   for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
     DeviceActions.push_back(new CudaDeviceAction(
         std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-        DeviceTriple, /* AtTopLevel */ false));
+        /* AtTopLevel */ false));
   // Return a new host action that incorporates original host action and all
   // device actions.
   return std::unique_ptr<Action>(
-      new CudaHostAction(std::move(HostAction), DeviceActions, DeviceTriple));
+      new CudaHostAction(std::move(HostAction), DeviceActions));
 }
 
-void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
-                          const InputList &Inputs, ActionList &Actions) const {
+void Driver::BuildActions(Compilation &C, const ToolChain &TC,
+                          DerivedArgList &Args, const InputList &Inputs,
+                          ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
 
   if (!SuppressMissingInputWarning && Inputs.empty()) {
@@ -1500,8 +1503,8 @@
       Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
 
       if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
-        Current = buildCudaActions(*this, TC, Args, InputArg,
-                                   std::move(Current), Actions);
+        Current =
+            buildCudaActions(C, Args, InputArg, std::move(Current), Actions);
         if (!Current)
           break;
       }
@@ -1803,7 +1806,7 @@
     InputInfo II;
     // Append outputs of device jobs to the input list.
     for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+      BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
                          /*MultipleArchs*/ false, LinkingOutput, II);
       CudaDeviceInputInfos.push_back(II);
     }
@@ -1843,11 +1846,12 @@
   }
 
   if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    BuildJobsForAction(
-        C, *CDA->begin(),
-        &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())),
-        CDA->getGpuArchName(), CDA->isAtTopLevel(),
-        /*MultipleArchs*/ true, LinkingOutput, Result);
+    // Initial processing of CudaDeviceAction carries host params.
+    // Call BuildJobsForAction() again, now with correct device parameters.
+    assert(CDA->getGpuArchName() && "No GPU name in device action.");
+    BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
+                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
+                       /*MultipleArchs*/ true, LinkingOutput, Result);
     return;
   }
 
Index: lib/Driver/Compilation.cpp
===================================================================
--- lib/Driver/Compilation.cpp
+++ lib/Driver/Compilation.cpp
@@ -24,8 +24,9 @@
 
 Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
                          InputArgList *_Args, DerivedArgList *_TranslatedArgs)
-    : TheDriver(D), DefaultToolChain(_DefaultToolChain), Args(_Args),
-      TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
+    : TheDriver(D), DefaultToolChain(_DefaultToolChain),
+      CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
+      Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
       ForDiagnostics(false) {}
 
 Compilation::~Compilation() {
Index: lib/Driver/Action.cpp
===================================================================
--- lib/Driver/Action.cpp
+++ lib/Driver/Action.cpp
@@ -58,18 +58,15 @@
 void CudaDeviceAction::anchor() {}
 
 CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
-                                   const char *ArchName,
-                                   const char *DeviceTriple, bool AtTopLevel)
+                                   const char *ArchName, bool AtTopLevel)
     : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
-      DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {}
+      AtTopLevel(AtTopLevel) {}
 
 void CudaHostAction::anchor() {}
 
 CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
-                               const ActionList &DeviceActions,
-                               const char *DeviceTriple)
-    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions),
-      DeviceTriple(DeviceTriple) {}
+                               const ActionList &DeviceActions)
+    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {}
 
 CudaHostAction::~CudaHostAction() {
   for (auto &DA : DeviceActions)
Index: include/clang/Driver/Driver.h
===================================================================
--- include/clang/Driver/Driver.h
+++ include/clang/Driver/Driver.h
@@ -297,22 +297,23 @@
   /// BuildActions - Construct the list of actions to perform for the
   /// given arguments, which are only done for a single architecture.
   ///
+  /// \param C - The compilation that is being built.
   /// \param TC - The default host tool chain.
   /// \param Args - The input arguments.
   /// \param Actions - The list to store the resulting actions onto.
-  void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args,
-                    const InputList &Inputs, ActionList &Actions) const;
+  void BuildActions(Compilation &C, const ToolChain &TC,
+                    llvm::opt::DerivedArgList &Args, const InputList &Inputs,
+                    ActionList &Actions) const;
 
   /// BuildUniversalActions - Construct the list of actions to perform
   /// for the given arguments, which may require a universal build.
   ///
+  /// \param C - The compilation that is being built.
   /// \param TC - The default host tool chain.
   /// \param Args - The input arguments.
   /// \param Actions - The list to store the resulting actions onto.
-  void BuildUniversalActions(const ToolChain &TC,
-                             llvm::opt::DerivedArgList &Args,
-                             const InputList &BAInputs,
-                             ActionList &Actions) const;
+  void BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                             const InputList &BAInputs) const;
 
   /// BuildJobs - Bind actions to concrete tools and translate
   /// arguments to form the list of jobs to run.
Index: include/clang/Driver/Compilation.h
===================================================================
--- include/clang/Driver/Compilation.h
+++ include/clang/Driver/Compilation.h
@@ -38,6 +38,9 @@
   /// The default tool chain.
   const ToolChain &DefaultToolChain;
 
+  const ToolChain *CudaHostToolChain;
+  const ToolChain *CudaDeviceToolChain;
+
   /// The original (untranslated) input argument list.
   llvm::opt::InputArgList *Args;
 
@@ -81,6 +84,17 @@
   const Driver &getDriver() const { return TheDriver; }
 
   const ToolChain &getDefaultToolChain() const { return DefaultToolChain; }
+  const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; }
+  const ToolChain *getCudaDeviceToolChain() const {
+    return CudaDeviceToolChain;
+  }
+
+  void setCudaHostToolChain(const ToolChain *HostToolChain) {
+    CudaHostToolChain = HostToolChain;
+  }
+  void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) {
+    CudaDeviceToolChain = DeviceToolChain;
+  }
 
   const llvm::opt::InputArgList &getInputArgs() const { return *Args; }
 
Index: include/clang/Driver/Action.h
===================================================================
--- include/clang/Driver/Action.h
+++ include/clang/Driver/Action.h
@@ -139,17 +139,15 @@
   virtual void anchor();
   /// GPU architecture to bind -- e.g 'sm_35'.
   const char *GpuArchName;
-  const char *DeviceTriple;
   /// True when action results are not consumed by the host action (e.g when
   /// -fsyntax-only or --cuda-device-only options are used).
   bool AtTopLevel;
 
 public:
   CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
-                   const char *DeviceTriple, bool AtTopLevel);
+                   bool AtTopLevel);
 
   const char *getGpuArchName() const { return GpuArchName; }
-  const char *getDeviceTriple() const { return DeviceTriple; }
   bool isAtTopLevel() const { return AtTopLevel; }
 
   static bool classof(const Action *A) {
@@ -160,16 +158,13 @@
 class CudaHostAction : public Action {
   virtual void anchor();
   ActionList DeviceActions;
-  const char *DeviceTriple;
 
 public:
-  CudaHostAction(std::unique_ptr<Action> Input, const ActionList &DeviceActions,
-                 const char *DeviceTriple);
+  CudaHostAction(std::unique_ptr<Action> Input,
+                 const ActionList &DeviceActions);
   ~CudaHostAction() override;
 
-  ActionList &getDeviceActions() { return DeviceActions; }
   const ActionList &getDeviceActions() const { return DeviceActions; }
-  const char *getDeviceTriple() const { return DeviceTriple; }
 
   static bool classof(const Action *A) { return A->getKind() == CudaHostClass; }
 };

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Re: [PATCH] D13144: [CUDA] propagate to CUDA sub-compilations target triple of opposite side.

Reply via email to