[PATCH] D18172: [CUDA][OpenMP] Add a generic offload action builder

Samuel Antao via cfe-commits Mon, 14 Mar 2016 18:33:52 -0700

sfantao created this revision.
sfantao added reviewers: ABataev, jlebar, tra, echristo, hfinkel.
sfantao added subscribers: caomhin, carlo.bertolli, arpith-jacob, cfe-commits.


This patch proposes a new class to generate and record action dependences 
related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.

The constructor of the builder detect the programming models that should be 
supported, and generates a specialized builder for each. If a new programming 
model is to be added in the future, only a new specialized builder has to be 
implemented. 

When the specialized builder is generated, it produces 
programming-model-specific diagnostics.

A CUDA specialized builder is proposed in the patch that mostly consists of the 
partition of the current  `buildCudaAction` by the three different 
functionalities.  

http://reviews.llvm.org/D18172

Files:
  lib/Driver/Driver.cpp

Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -1342,128 +1342,425 @@
   }
 }
 
-// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE
-// input action and then wraps each in CudaDeviceAction paired with
-// appropriate GPU arch name. In case of partial (i.e preprocessing
-// only) or device-only compilation, each device action is added to /p
-// Actions and /p Current is released. Otherwise the function creates
-// and returns a new CudaHostAction which wraps /p Current and device
-// side actions.
-static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
-                                const Arg *InputArg, Action *HostAction,
-                                ActionList &Actions) {
-  Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
-                                               options::OPT_cuda_device_only);
-  // Host-only compilation case.
-  if (PartialCompilationArg &&
-      PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only)) {
-    OffloadAction::HostDependence HDep(
-        HostAction, C.getOffloadingHostToolChain(), /*BoundArch=*/nullptr,
-        Action::OFFLOAD_CUDA);
-    return C.MakeAction<OffloadAction>(HDep);
-  }
-
-  // Collect all cuda_gpu_arch parameters, removing duplicates.
-  SmallVector<const char *, 4> GpuArchList;
-  llvm::StringSet<> GpuArchNames;
-  for (Arg *A : Args) {
-    if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
-      continue;
-    A->claim();
+namespace {
+/// \brief Provides a convenient interface for different programming models to
+/// generate the required device actions.
+class OffloadingActionBuilder {
+  /// \brief Flag used to trace errors in the builder.
+  bool IsValid;
+
+  /// \brief The compilation that is using this builder.
+  Compilation &C;
+
+  /// \brief The derived arguments associated with this builder.
+  DerivedArgList &Args;
+
+  /// \brief Builder interface. It doesn't build anything or keep any state.
+  class DeviceActionBuilder {
+  public:
+    typedef llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PhasesTy;
+
+  protected:
+    /// \brief Compilation associated with this builder.
+    Compilation &C;
+
+    /// \brief Tool chains associated with this builder. The same programming
+    /// model may have associated one or more tool chains.
+    SmallVector<const ToolChain *, 2> ToolChains;
+
+    /// \brief The derived arguments associated with this builder.
+    DerivedArgList &Args;
+
+    /// \brief The inputs associated with this builder.
+    const Driver::InputList &Inputs;
+
+  public:
+    DeviceActionBuilder(Compilation &C, DerivedArgList &Args,
+                        const Driver::InputList &Inputs)
+        : C(C), Args(Args), Inputs(Inputs) {}
+    virtual ~DeviceActionBuilder() {}
+
+    // \brief Fill up the array \a DA with all the device dependences that
+    // should be added to the provided host action \a HostAction. If the host
+    // action is to be ignored or there is any error, return true.
+    virtual bool getDeviceDepences(OffloadAction::DeviceDependences &DA,
+                                   Action *HostAction, types::ID InputType,
+                                   const Arg *InputArg, phases::ID CurPhase,
+                                   phases::ID FinalPhase, PhasesTy &Phases) {
+      return false;
+    }
 
-    const auto& Arch = A->getValue();
-    if (!toolchains::CudaToolChain::GpuArchToComputeName(Arch))
-      C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch;
-    else if (GpuArchNames.insert(Arch).second)
-      GpuArchList.push_back(Arch);
-  }
-
-  // Default to sm_20 which is the lowest common denominator for supported GPUs.
-  // sm_20 code should work correctly, if suboptimally, on all newer GPUs.
-  if (GpuArchList.empty())
-    GpuArchList.push_back("sm_20");
-
-  // Replicate inputs for each GPU architecture.
-  Driver::InputList CudaDeviceInputs;
-  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
-    CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
-
-  const ToolChain *CudaTC =
-      C.getSingleOffloadDeviceToolChain<Action::OFFLOAD_CUDA>();
-
-  // Build actions for all device inputs.
-  assert(CudaTC && "Missing toolchain for device-side compilation.");
-  ActionList CudaDeviceActions;
-  C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions);
-  assert(GpuArchList.size() == CudaDeviceActions.size() &&
-         "Failed to create actions for all devices");
-
-  // Check whether any of device actions stopped before they could generate PTX.
-  bool PartialCompilation =
-      llvm::any_of(CudaDeviceActions, [](const Action *a) {
-        return a->getKind() != Action::AssembleJobClass;
-      });
-
-  // Figure out what to do with device actions -- pass them as inputs to the
-  // host action or run each of them independently.
-  bool DeviceOnlyCompilation = PartialCompilationArg != nullptr;
-  if (PartialCompilation || DeviceOnlyCompilation) {
-    // In case of partial or device-only compilation results of device actions
-    // are not consumed by the host action device actions have to be added to
-    // top-level actions list with AtTopLevel=true and run independently.
-
-    // -o is ambiguous if we have more than one top-level action.
-    if (Args.hasArg(options::OPT_o) &&
-        (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
-      C.getDriver().Diag(
-          clang::diag::err_drv_output_argument_with_multiple_files);
-      return nullptr;
+    // \brief Update the state to include the provided host action \a HostAction
+    // as a dependency of the current device action. Return true, if an error
+    // was found.
+    virtual bool addDeviceDepences(Action *HostAction, types::ID InputType,
+                                   const Arg *InputArg) {
+      return false;
     }
 
-    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
-      OffloadAction::DeviceDependences DDep;
-      DDep.add(CudaDeviceActions[I], CudaTC, GpuArchList[I],
+    // \brief Append top level actions generated by the builder. Return true if
+    // errors were found.
+    virtual bool appendTopLevelActions(ActionList &AL) { return false; }
+
+    // \brief Initialize the builder. Return true if any initialization errors
+    // are found.
+    virtual bool initialize() { return false; }
+
+    // \brief Return true if this builder is valid. We have a valid builder if
+    // we have associated device tool chains.
+    bool isValid() { return !ToolChains.empty(); }
+  };
+
+  /// \brief CUDA action builder. It injects device code in the host backend
+  /// action.
+  class CudaActionBuilder : public DeviceActionBuilder {
+    /// \brief Flags to signal if the user requested host-only or device-only
+    /// compilation.
+    bool IsHostOnlyCompilation = false;
+    bool IsDeviceOnlyCompilation = false;
+
+    /// \brief List of GPU architectures to use in this compilation.
+    SmallVector<const char *, 4> GpuArchList;
+
+    /// \brief The CUDA actions for the current input.
+    ActionList CudaDeviceActions;
+
+    /// \brief The CUDA fat binary if it was generated for the current input.
+    Action *CudaFatBinary = nullptr;
+
+  public:
+    CudaActionBuilder(Compilation &C, DerivedArgList &Args,
+                      const Driver::InputList &Inputs)
+        : DeviceActionBuilder(C, Args, Inputs) {}
+
+    bool getDeviceDepences(OffloadAction::DeviceDependences &DA,
+                           Action *HostAction, types::ID InputType,
+                           const Arg *InputArg, phases::ID CurPhase,
+                           phases::ID FinalPhase, PhasesTy &Phases) override {
+      // If we are in host-only mode and dealing with a CUDA input we generate
+      // a empty dependency to mark the compile host action with the CUDA kind.
+      if (IsHostOnlyCompilation && InputType == types::TY_CUDA &&
+          CurPhase == phases::Backend) {
+        DA.add(/*A=*/nullptr, ToolChains.front(), /*BoundArch=*/nullptr,
                Action::OFFLOAD_CUDA);
-      Actions.push_back(
-          C.MakeAction<OffloadAction>(DDep, CudaDeviceActions[I]->getType()));
+        return false;
+      }
+
+      // If we don't have any CUDA actions, we don't have any dependences to
+      // create for the host.
+      if (CudaDeviceActions.empty())
+        return false;
+
+      assert(CudaDeviceActions.size() == GpuArchList.size() &&
+             "Expecting one action per GPU architecture.");
+      assert(!IsHostOnlyCompilation &&
+             "Not expecting CUDA actions in host-only compilation.");
+
+      // If we are generating code for the device or we are in a backend phase,
+      // we attempt to generate the fat binary. We compile each arch to ptx and
+      // assemble to cubin, then feed the cubin *and* the ptx into a device
+      // "link" action, which uses fatbinary to combine these cubins into one
+      // fatbin.  The fatbin is then an input to the host action if not in
+      // device-only mode.
+      if (IsDeviceOnlyCompilation || CurPhase == phases::Backend) {
+        ActionList DeviceActions;
+        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+          // Produce the device action from the current phase up to the assemble
+          // phase.
+          for (auto Ph : Phases) {
+            // Skip the phases that were already dealt with.
+            if (Ph < CurPhase)
+              continue;
+            // We have to be consistent with the host final phase.
+            if (Ph > FinalPhase)
+              break;
+
+            CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
+                C, Args, Ph, CudaDeviceActions[I]);
+
+            if (Ph == phases::Assemble)
+              break;
+          }
+
+          // If we didn't reach the assemble phase, we can't generate the fat
+          // binary.
+          if (!isa<AssembleJobAction>(CudaDeviceActions[I]))
+            continue;
+
+          Action *AssembleAction = CudaDeviceActions[I];
+          assert(AssembleAction->getType() == types::TY_Object);
+          assert(AssembleAction->getInputs().size() == 1);
+
+          Action *BackendAction = AssembleAction->getInputs()[0];
+          assert(BackendAction->getType() == types::TY_PP_Asm);
+
+          for (auto &A : {AssembleAction, BackendAction}) {
+            OffloadAction::DeviceDependences DDep;
+            DDep.add(A, ToolChains.front(), GpuArchList[I],
+                     Action::OFFLOAD_CUDA);
+            DeviceActions.push_back(
+                C.MakeAction<OffloadAction>(DDep, A->getType()));
+          }
+        }
+
+        // We generate the fat binary if we have input device actions.
+        if (!DeviceActions.empty()) {
+          CudaFatBinary =
+              C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
+
+          if (!IsDeviceOnlyCompilation) {
+            DA.add(CudaFatBinary, ToolChains.front(), /*BoundArch=*/nullptr,
+                   Action::OFFLOAD_CUDA);
+            // Clear the fat binary, it is already a dependence to an host
+            // action.
+            CudaFatBinary = nullptr;
+          }
+
+          // Remove the CUDA actions as they are already connected to an host
+          // action or fat binary.
+          CudaDeviceActions.clear();
+        }
+
+        // We return IsDeviceOnlyCompilation to avoid creating host action in
+        // device-only mode.
+        return IsDeviceOnlyCompilation;
+      }
+
+      assert(CurPhase < phases::Backend && "Generating single CUDA "
+                                           "instructions should only occur "
+                                           "before the backend phase!");
+
+      // Produce an action for each device arch.
+      for (Action *&A : CudaDeviceActions)
+        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
+      return false;
+    }
+
+    bool addDeviceDepences(Action *HostAction, types::ID InputType,
+                           const Arg *InputArg) override {
+      // While generating code for CUDA, we only depend on the host input action
+      // to trigger the creation of all the CUDA device actions.
+
+      // If we are dealing with an input action, replicate it for each GPU
+      // architecture, unless we are in host-only mode.
+      if (isa<InputAction>(HostAction) && !IsHostOnlyCompilation) {
+        assert(!GpuArchList.empty() &&
+               "We should have at least one GPU architecture.");
+
+        // If the host input is not CUDA, we don't need to bother about this
+        // input.
+        if (InputType != types::TY_CUDA)
+          return false;
+
+        // Replicate inputs for each GPU architecture.
+        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
+          CudaDeviceActions.push_back(
+              C.MakeAction<InputAction>(*InputArg, types::TY_CUDA_DEVICE));
+      }
+      return false;
+    }
+
+    bool appendTopLevelActions(ActionList &AL) override {
+      // Utility to append actions to the top level list.
+      auto AddTopLevel = [&](Action *A, const char *BoundArch) {
+        OffloadAction::DeviceDependences Dep;
+        Dep.add(A, ToolChains.front(), BoundArch, Action::OFFLOAD_CUDA);
+        AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
+      };
+
+      // If we have a fat binary, add it to the list.
+      if (CudaFatBinary) {
+        AddTopLevel(CudaFatBinary, /*BoundArch=*/nullptr);
+        CudaDeviceActions.clear();
+        CudaFatBinary = nullptr;
+        return false;
+      }
+
+      if (CudaDeviceActions.empty())
+        return false;
+
+      // If we have CUDA actions at this point, that's because we have a have
+      // partial compilation, so we should have an action for each GPU
+      // architecture.
+      assert(CudaDeviceActions.size() == GpuArchList.size() &&
+             "Expecting one action per GPU architecture.");
+      assert(ToolChains.size() == 1 &&
+             "Expecting to have a sing CUDA toolchain.");
+      for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
+        AddTopLevel(CudaDeviceActions[I], GpuArchList[I]);
+
+      CudaDeviceActions.clear();
+      return false;
+    }
+
+    bool initialize() override {
+      bool Error = false;
+
+      const auto *CudaTC =
+          C.getSingleOffloadDeviceToolChain<Action::OFFLOAD_CUDA>();
+
+      // We don't need to support CUDA.
+      if (!CudaTC)
+        return Error;
+
+      ToolChains.push_back(CudaTC);
+
+      Arg *PartialCompilationArg = Args.getLastArg(
+          options::OPT_cuda_host_only, options::OPT_cuda_device_only);
+
+      IsHostOnlyCompilation = PartialCompilationArg &&
+                              PartialCompilationArg->getOption().matches(
+                                  options::OPT_cuda_host_only);
+
+      IsDeviceOnlyCompilation = PartialCompilationArg && !IsHostOnlyCompilation;
+
+      // Collect all cuda_gpu_arch parameters, removing duplicates.
+      llvm::StringSet<> GpuArchNames;
+      for (Arg *A : Args) {
+        if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
+          continue;
+        A->claim();
+
+        const auto &Arch = A->getValue();
+        if (!toolchains::CudaToolChain::GpuArchToComputeName(Arch)) {
+          C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch;
+          Error = true;
+        } else if (GpuArchNames.insert(Arch).second)
+          GpuArchList.push_back(Arch);
+      }
+
+      // Default to sm_20 which is the lowest common denominator for supported
+      // GPUs.
+      // sm_20 code should work correctly, if suboptimally, on all newer GPUs.
+      if (GpuArchList.empty())
+        GpuArchList.push_back("sm_20");
+
+      return Error;
     }
-    // Kill host action in case of device-only compilation.
-    if (DeviceOnlyCompilation)
+  };
+
+  /// Add the implementation for other specialized builders here.
+
+  /// \brief Specialized builders being used by this offloading action builder.
+  SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;
+
+public:
+  OffloadingActionBuilder(Compilation &C, DerivedArgList &Args,
+                          const Driver::InputList &Inputs)
+      : C(C), Args(Args) {
+    // Create a specialized builder for each device toolchain.
+
+    IsValid = true;
+
+    // Create a specialized builder for CUDA.
+    SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
+
+    //
+    // Build other specialized builders here.
+    //
+
+    // Initialize all the builders, keeping track of errors.
+    for (auto *SB : SpecializedBuilders)
+      IsValid = IsValid && !SB->initialize();
+  }
+
+  ~OffloadingActionBuilder() {
+    for (auto *SB : SpecializedBuilders)
+      delete SB;
+  }
+
+  /// \brief Generate an action that adds device dependences (if any) to an host
+  /// action. If no dependence actions exist, just return the host action \a
+  /// HostAction. If an error is found or if no builder requires the host
+  /// toolchain, return nullptr.
+  Action *
+  addDeviceDependencesToHostAction(Action *HostAction, types::ID InputType,
+                                   const Arg *InputArg, phases::ID CurPhase,
+                                   phases::ID FinalPhase,
+                                   DeviceActionBuilder::PhasesTy &Phases) {
+    if (!IsValid)
       return nullptr;
-    return HostAction;
-  }
-
-  // If we're not a partial or device-only compilation, we compile each arch to
-  // ptx and assemble to cubin, then feed the cubin *and* the ptx into a device
-  // "link" action, which uses fatbinary to combine these cubins into one
-  // fatbin.  The fatbin is then an input to the host compilation.
-  ActionList DeviceActions;
-  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
-    Action* AssembleAction = CudaDeviceActions[I];
-    assert(AssembleAction->getType() == types::TY_Object);
-    assert(AssembleAction->getInputs().size() == 1);
-
-    Action* BackendAction = AssembleAction->getInputs()[0];
-    assert(BackendAction->getType() == types::TY_PP_Asm);
-
-    for (auto &A : {AssembleAction, BackendAction}) {
-      OffloadAction::DeviceDependences DDep;
-      DDep.add(A, CudaTC, GpuArchList[I], Action::OFFLOAD_CUDA);
-      DeviceActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType()));
-    }
-  }
-  auto FatbinAction =
-      C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
-
-  // Return a new host action that incorporates original host action and all
-  // device actions.
-  OffloadAction::HostDependence HDep(HostAction, C.getOffloadingHostToolChain(),
-                                     /*BoundArch=*/nullptr,
-                                     Action::OFFLOAD_CUDA);
-  OffloadAction::DeviceDependences DDep;
-  DDep.add(FatbinAction, CudaTC, /*BoundArch=*/nullptr, Action::OFFLOAD_CUDA);
-  return C.MakeAction<OffloadAction>(HDep, DDep);
-}
+
+    if (SpecializedBuilders.empty())
+      return HostAction;
+
+    assert(HostAction && "Invalid host action!");
+
+    OffloadAction::DeviceDependences DDeps;
+    // Check if all the programming models agree we should not emit the host
+    // action.
+    bool DoesNotRequireHostAction = true;
+    for (auto *SB : SpecializedBuilders) {
+      if (!SB->isValid()) {
+        DoesNotRequireHostAction = false;
+        continue;
+      }
+      DoesNotRequireHostAction =
+          DoesNotRequireHostAction &&
+          SB->getDeviceDepences(DDeps, HostAction, InputType, InputArg,
+                                CurPhase, FinalPhase, Phases);
+    }
+
+    if (DoesNotRequireHostAction)
+      return nullptr;
+
+    if (DDeps.getActions().empty())
+      return HostAction;
+
+    // We have dependences we need to bundle together. We use an offload action
+    // for that.
+    OffloadAction::HostDependence HDep(HostAction,
+                                       C.getOffloadingHostToolChain(),
+                                       /*BoundArch=*/nullptr, DDeps);
+    return C.MakeAction<OffloadAction>(HDep, DDeps);
+  }
+
+  /// \brief Generate an action that adds a host dependence to a device action.
+  /// The results will be kept in this action builder. Return true if an error
+  /// was found.
+  bool addHostDependenceToDeviceActions(Action *HostAction, types::ID InputType,
+                                        const Arg *InputArg) {
+    if (!IsValid)
+      return true;
+
+    assert(HostAction && "Invalid host action!");
+
+    bool Error = false;
+    for (auto *SB : SpecializedBuilders) {
+      if (!SB->isValid())
+        continue;
+      Error = Error || SB->addDeviceDepences(HostAction, InputType, InputArg);
+    }
+    return Error;
+  }
+
+  /// \brief Add the offloading top level actions to the provided action list.
+  bool appendTopLevelActions(ActionList &AL) {
+    bool Error = false;
+
+    auto NumActions = AL.size();
+
+    for (auto *SB : SpecializedBuilders) {
+      if (!SB->isValid())
+        continue;
+      Error = Error || SB->appendTopLevelActions(AL);
+    }
+
+    assert(NumActions <= AL.size() && "Expecting more actions, not less!");
+
+    // If any action is added by the builders, -o is ambiguous if we have more
+    // than one top-level action.
+    if (NumActions < AL.size() && Args.hasArg(options::OPT_o) &&
+        AL.size() > 1) {
+      C.getDriver().Diag(
+          clang::diag::err_drv_output_argument_with_multiple_files);
+      return true;
+    }
+
+    return Error;
+  }
+};
+} // anonymous namespace.
 
 void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
                           const InputList &Inputs, ActionList &Actions) const {
@@ -1571,6 +1868,9 @@
     YcArg = YuArg = nullptr;
   }
 
+  // Builder to be used to build offloading actions.
+  OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
+
   // Construct the actions to perform.
   ActionList LinkerInputs;
 
@@ -1633,52 +1933,63 @@
       continue;
     }
 
-    phases::ID CudaInjectionPhase =
-        (phases::Compile < FinalPhase &&
-         llvm::find(PL, phases::Compile) != PL.end())
-            ? phases::Compile
-            : FinalPhase;
 
     // Build the pipeline for this file.
     Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
+
+    // Use the current host action in any of the offloading actions, if
+    // required.
+    if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputType,
+                                                        InputArg))
+      break;
+
     for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
          i != e; ++i) {
       phases::ID Phase = *i;
 
       // We are done if this step is past what the user requested.
       if (Phase > FinalPhase)
         break;
 
+      // Add any offload action the host action depends on.
+      Current = OffloadBuilder.addDeviceDependencesToHostAction(
+          Current, InputType, InputArg, Phase, FinalPhase, PL);
+      if (!Current)
+        break;
+
       // Queue linker inputs.
       if (Phase == phases::Link) {
         assert((i + 1) == e && "linking must be final compilation step.");
         LinkerInputs.push_back(Current);
         Current = nullptr;
         break;
       }
 
-      // Some types skip the assembler phase (e.g., llvm-bc), but we can't
-      // encode this in the steps because the intermediate type depends on
-      // arguments. Just special case here.
-      if (Phase == phases::Assemble && Current->getType() != types::TY_PP_Asm)
+      // Otherwise construct the appropriate action.
+      auto *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current);
+
+      // We didn't create a new action, so we will just move to the next phase.
+      if (NewCurrent == Current)
         continue;
 
-      // Otherwise construct the appropriate action.
-      Current = ConstructPhaseAction(C, Args, Phase, Current);
+      Current = NewCurrent;
 
-      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
-        Current = buildCudaActions(C, Args, InputArg, Current, Actions);
-        if (!Current)
-          break;
-      }
+      // Use the current host action in any of the offloading actions, if
+      // required.
+      if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputType,
+                                                          InputArg))
+        break;
 
       if (Current->getType() == types::TY_Nothing)
         break;
     }
 
     // If we ended with something, add to the output list.
     if (Current)
       Actions.push_back(Current);
+
+    // Add any top level actions generated for offloading.
+    OffloadBuilder.appendTopLevelActions(Actions);
   }
 
   // Add a link action if necessary.
@@ -1704,6 +2015,13 @@
 Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
                                      phases::ID Phase, Action *Input) const {
   llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
+
+  // Some types skip the assembler phase (e.g., llvm-bc), but we can't
+  // encode this in the steps because the intermediate type depends on
+  // arguments. Just special case here.
+  if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm)
+    return Input;
+
   // Build the appropriate action.
   switch (Phase) {
   case phases::Link:

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D18172: [CUDA][OpenMP] Add a generic offload action builder

Reply via email to