llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> Summary: This is a new option that tries to make selecting offloading toolchains more generic. Currently we infer the toolchain from a combination of the kind and the `--offload-arch=` option. Doing this becomes complicated when we want to start supporting multiple targets for a single toolchain, i.e. HIP on SPIR-V or AMDGCN. Currently we hack in a special 'architecture' instead, which isn't extensible. Additionally in the future we may accept compiling CUDA or HIP to some other architecture. --- Patch is 31.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125556.diff 11 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticDriverKinds.td (+2) - (modified) clang/include/clang/Driver/Driver.h (+5-13) - (modified) clang/include/clang/Driver/Options.td (+8-3) - (modified) clang/lib/Driver/Driver.cpp (+173-108) - (modified) clang/lib/Driver/ToolChain.cpp (+32-10) - (modified) clang/lib/Driver/ToolChains/Cuda.cpp (+1-1) - (modified) clang/lib/Driver/ToolChains/Cuda.h (+1-1) - (modified) clang/lib/Driver/ToolChains/Darwin.cpp (-24) - (modified) clang/test/Driver/Xarch.c (+8) - (added) clang/test/Driver/offload-Xarch.c (+31) - (added) clang/test/Driver/offload-target.c (+21) ``````````diff diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 8d599c96eb4fbf..94baa35e6a9ec7 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -119,6 +119,8 @@ def err_drv_cuda_host_arch : Error< "unsupported architecture '%0' for host compilation">; def err_drv_mix_cuda_hip : Error< "mixed CUDA and HIP compilation is not supported">; +def err_drv_mix_offload : Error< + "mixed %0 and %1 offloading compilation is not supported">; def err_drv_bad_target_id : Error< "invalid target ID '%0'; format is a processor name followed by an optional " "colon-delimited list of features followed by an enable/disable sign (e.g., " diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index f4a52cc529b79c..b463dc2a93550e 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -797,22 +797,14 @@ class Driver { const ToolChain &getToolChain(const llvm::opt::ArgList &Args, const llvm::Triple &Target) const; - /// @} - - /// Retrieves a ToolChain for a particular device \p Target triple - /// - /// \param[in] HostTC is the host ToolChain paired with the device - /// - /// \param[in] TargetDeviceOffloadKind (e.g. OFK_Cuda/OFK_OpenMP/OFK_SYCL) is - /// an Offloading action that is optionally passed to a ToolChain (used by - /// CUDA, to specify if it's used in conjunction with OpenMP) + /// Retrieves a ToolChain for a particular \p Target triple for offloading. /// /// Will cache ToolChains for the life of the driver object, and create them /// on-demand. - const ToolChain &getOffloadingDeviceToolChain( - const llvm::opt::ArgList &Args, const llvm::Triple &Target, - const ToolChain &HostTC, - const Action::OffloadKind &TargetDeviceOffloadKind) const; + const ToolChain &getOffloadToolChain(const llvm::opt::ArgList &Args, + const Action::OffloadKind Kind, + const llvm::Triple &Target, + const llvm::Triple &AuxTarget) const; /// Get bitmasks for which option flags to include and exclude based on /// the driver mode. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d8123cc39fdc95..482d264ef37e2c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -932,7 +932,9 @@ def W_Joined : Joined<["-"], "W">, Group<W_Group>, def Xanalyzer : Separate<["-"], "Xanalyzer">, HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">, Group<StaticAnalyzer_Group>; -def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>; +def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, + HelpText<"Pass <arg> to the compiliation if the target matches <arch>">, + MetaVarName<"<arch> <arg>">; def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>, HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">; def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>, @@ -1115,14 +1117,17 @@ def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">, // Common offloading options let Group = offload_Group in { -def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>, +def offload_targets_EQ : Joined<["--"], "offload-targets=">, + Visibility<[ClangOption, FlangOption]>, Flags<[NoXarchOption]>, + HelpText<"Specify a list of target architectures to use for offloading.">; + +def offload_arch_EQ : Joined<["--"], "offload-arch=">, Visibility<[ClangOption, FlangOption]>, HelpText<"Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). " "If 'native' is used the compiler will detect locally installed architectures. " "For HIP offloading, the device architecture can be followed by target ID features " "delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.">; def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">, - Flags<[NoXarchOption]>, Visibility<[ClangOption, FlangOption]>, HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. " "'all' resets the list to its default value.">; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 912777a9808b4b..cb0f06b7ed4631 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -879,34 +879,100 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, }) || C.getInputArgs().hasArg(options::OPT_hip_link) || C.getInputArgs().hasArg(options::OPT_hipstdpar); + bool IsOpenMP = + C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false); + bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, + options::OPT_fno_sycl, false); + bool UseLLVMOffload = C.getInputArgs().hasArg( options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); + + llvm::DenseSet<Action::OffloadKind> Kinds; + const std::pair<bool, Action::OffloadKind> ActiveKinds[] = { + {IsCuda, Action::OFK_Cuda}, + {IsHIP, Action::OFK_HIP}, + {IsOpenMP, Action::OFK_OpenMP}, + {IsSYCL, Action::OFK_SYCL}}; + for (const auto &[Active, Kind] : ActiveKinds) + if (Active) + Kinds.insert(Kind); + + // Build an offloading toolchain for every requested target and kind. + for (StringRef Targets : + C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ)) { + // We currently don't support any kind of mixed offloading. + if (Kinds.size() > 1) { + Diag(clang::diag::err_drv_mix_offload) + << Action::GetOffloadKindName(*Kinds.begin()) + << Action::GetOffloadKindName(*Kinds.end()); + return; + } + + // OpenMP offloading requires a compatible libomp. + if (Kinds.contains(Action::OFK_OpenMP)) { + OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); + if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + return; + } + } + + // Certain options are not allowed when combined with SYCL compilation. + if (Kinds.contains(Action::OFK_SYCL)) { + for (auto ID : + {options::OPT_static_libstdcxx, options::OPT_ffreestanding}) + if (Arg *IncompatArg = C.getInputArgs().getLastArg(ID)) + Diag(clang::diag::err_drv_argument_not_allowed_with) + << IncompatArg->getSpelling() << "-fsycl"; + } + + // Create a device toolchain for every specified triple. + for (StringRef Target : llvm::split(Targets, ",")) { + llvm::Triple TT(Target); + for (Action::OffloadKind Kind : Kinds) { + auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT, + C.getDefaultToolChain().getTriple()); + + // Emit a warning if the detected CUDA version is too new. + if (Kind == Action::OFK_Cuda) { + auto &CudaInstallation = + static_cast<const toolchains::CudaToolChain &>(TC) + .CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); + } + + C.addOffloadDeviceToolChain(&TC, Kind); + } + } + } + + // If the user did not specify the toolchains specifically we will infer them + // based on the usage of `--offloard-arch=`. + if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) + return; + if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; } if (IsCuda && !UseLLVMOffload) { - const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); - const llvm::Triple &HostTriple = HostTC->getTriple(); - auto OFK = Action::OFK_Cuda; - auto CudaTriple = - getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), HostTriple); + auto CudaTriple = getNVIDIAOffloadTargetTriple( + *this, C.getInputArgs(), C.getDefaultToolChain().getTriple()); if (!CudaTriple) return; - // Use the CUDA and host triples as the key into the ToolChains map, - // because the device toolchain we create depends on both. - auto &CudaTC = ToolChains[CudaTriple->str() + "/" + HostTriple.str()]; - if (!CudaTC) { - CudaTC = std::make_unique<toolchains::CudaToolChain>( - *this, *CudaTriple, *HostTC, C.getInputArgs()); - - // Emit a warning if the detected CUDA version is too new. - CudaInstallationDetector &CudaInstallation = - static_cast<toolchains::CudaToolChain &>(*CudaTC).CudaInstallation; - if (CudaInstallation.isValid()) - CudaInstallation.WarnIfUnsupportedVersion(); - } - C.addOffloadDeviceToolChain(CudaTC.get(), OFK); + + auto &TC = + getOffloadToolChain(C.getInputArgs(), Action::OFK_Cuda, *CudaTriple, + C.getDefaultToolChain().getTriple()); + + // Emit a warning if the detected CUDA version is too new. + const CudaInstallationDetector &CudaInstallation = + static_cast<const toolchains::CudaToolChain &>(TC).CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); + C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); } else if (IsHIP && !UseLLVMOffload) { if (auto *OMPTargetArg = C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { @@ -914,14 +980,15 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, << OMPTargetArg->getSpelling() << "HIP"; return; } - const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); - auto OFK = Action::OFK_HIP; + auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); if (!HIPTriple) return; - auto *HIPTC = &getOffloadingDeviceToolChain(C.getInputArgs(), *HIPTriple, - *HostTC, OFK); - C.addOffloadDeviceToolChain(HIPTC, OFK); + + auto &TC = + getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP); } if (IsCuda || IsHIP) @@ -934,10 +1001,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. bool IsOpenMPOffloading = ((IsCuda || IsHIP) && UseLLVMOffload) || - (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || - C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); + (IsOpenMP && (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || + C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); if (IsOpenMPOffloading) { // We expect that -fopenmp-targets is always used in conjunction with the // option -fopenmp specifying a valid runtime with offloading support, i.e. @@ -1038,50 +1103,23 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, FoundNormalizedTriples[NormalizedName] = Val; // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) + if (TT.getArch() == llvm::Triple::UnknownArch) { Diag(clang::diag::err_drv_invalid_omp_target) << Val; - else { - const ToolChain *TC; - // Device toolchains have to be selected differently. They pair host - // and device in their implementation. - if (TT.isNVPTX() || TT.isAMDGCN() || TT.isSPIRV()) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain<Action::OFK_Host>(); - assert(HostTC && "Host toolchain should be always defined."); - auto &DeviceTC = - ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; - if (!DeviceTC) { - if (TT.isNVPTX()) - DeviceTC = std::make_unique<toolchains::CudaToolChain>( - *this, TT, *HostTC, C.getInputArgs()); - else if (TT.isAMDGCN()) - DeviceTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>( - *this, TT, *HostTC, C.getInputArgs()); - else if (TT.isSPIRV()) - DeviceTC = std::make_unique<toolchains::SPIRVOpenMPToolChain>( - *this, TT, *HostTC, C.getInputArgs()); - else - assert(DeviceTC && "Device toolchain not defined."); - } - - TC = DeviceTC.get(); - } else - TC = &getToolChain(C.getInputArgs(), TT); - C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); - auto It = DerivedArchs.find(TT.getTriple()); - if (It != DerivedArchs.end()) - KnownArchs[TC] = It->second; + continue; } + + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); + auto It = DerivedArchs.find(TT.getTriple()); + if (It != DerivedArchs.end()) + KnownArchs[&TC] = It->second; } } else if (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ)) { Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); return; } - // We need to generate a SYCL toolchain if the user specified -fsycl. - bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, - options::OPT_fno_sycl, false); - auto argSYCLIncompatible = [&](OptSpecifier OptId) { if (!IsSYCL) return; @@ -1103,9 +1141,9 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // getOffloadingDeviceToolChain, because the device toolchains we're // going to create will depend on both. const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); - for (const auto &TargetTriple : UniqueSYCLTriplesVec) { - auto SYCLTC = &getOffloadingDeviceToolChain( - C.getInputArgs(), TargetTriple, *HostTC, Action::OFK_SYCL); + for (const auto &TT : UniqueSYCLTriplesVec) { + auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + HostTC->getTriple()); C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL); } } @@ -3409,7 +3447,8 @@ class OffloadingActionBuilder final { // Collect all offload arch parameters, removing duplicates. std::set<StringRef> GpuArchs; bool Error = false; - for (Arg *A : Args) { + const ToolChain &TC = *ToolChains.front(); + for (Arg *A : C.getArgsForToolChain(&TC, "", AssociatedOffloadKind)) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || A->getOption().matches(options::OPT_no_offload_arch_EQ))) continue; @@ -3420,7 +3459,6 @@ class OffloadingActionBuilder final { ArchStr == "all") { GpuArchs.clear(); } else if (ArchStr == "native") { - const ToolChain &TC = *ToolChains.front(); auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); if (!GPUsOrErr) { TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) @@ -6604,6 +6642,72 @@ std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const { return std::string(Output); } +const ToolChain &Driver::getOffloadToolChain( + const llvm::opt::ArgList &Args, const Action::OffloadKind Kind, + const llvm::Triple &Target, const llvm::Triple &AuxTarget) const { + auto &TC = ToolChains[Target.str() + "/" + AuxTarget.str()]; + auto &HostTC = ToolChains[AuxTarget.str()]; + + assert(HostTC && "Host toolchain for offloading doesn't exit?"); + if (!TC) { + // Detect the toolchain based off of the target operating system. + switch (Target.getOS()) { + case llvm::Triple::CUDA: + TC = std::make_unique<toolchains::CudaToolChain>(*this, Target, *HostTC, + Args); + break; + case llvm::Triple::AMDHSA: + if (Kind == Action::OFK_HIP) + TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target, + *HostTC, Args); + else if (Kind == Action::OFK_OpenMP) + TC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(*this, Target, + *HostTC, Args); + break; + default: + break; + } + } + if (!TC) { + // Detect the toolchain based off of the target architecture if that failed. + switch (Target.getArch()) { + case llvm::Triple::spir: + case llvm::Triple::spir64: + case llvm::Triple::spirv: + case llvm::Triple::spirv32: + case llvm::Triple::spirv64: + switch (Kind) { + case Action::OFK_SYCL: + TC = std::make_unique<toolchains::SYCLToolChain>(*this, Target, *HostTC, + Args); + break; + case Action::OFK_HIP: + TC = std::make_unique<toolchains::HIPSPVToolChain>(*this, Target, + *HostTC, Args); + break; + case Action::OFK_OpenMP: + TC = std::make_unique<toolchains::SPIRVOpenMPToolChain>(*this, Target, + *HostTC, Args); + break; + case Action::OFK_Cuda: + TC = std::make_unique<toolchains::CudaToolChain>(*this, Target, *HostTC, + Args); + break; + default: + break; + } + break; + default: + break; + } + } + + // If all else fails, just look up the normal toolchain for the target. + if (!TC) + return getToolChain(Args, Target); + return *TC; +} + const ToolChain &Driver::getToolChain(const ArgList &Args, const llvm::Triple &Target) const { @@ -6797,45 +6901,6 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, return *TC; } -const ToolChain &Driver::getOffloadingDeviceToolChain( - const ArgList &Args, const llvm::Triple &Target, const ToolChain &HostTC, - const Action::OffloadKind &TargetDeviceOffloadKind) const { - // Use device / host triples as the key into the ToolChains map because the - // device ToolChain we create depends on both. - auto &TC = ToolChains[Target.str() + "/" + HostTC.getTriple().str()]; - if (!TC) { - // Categorized by offload kind > arch rather than OS > arch like - // the normal getToolChain call, as it seems a reasonable way to categorize - // things. - switch (TargetDeviceOffloadKind) { - case Action::OFK_HIP: { - if (((Target.getArch() == llvm::Triple::amdgcn || - Target.getArch() == llvm::Triple::spirv64) && - Target.getVendor() == llvm::Triple::AMD && - Target.getOS() == llvm::Triple::AMDHSA) || - !Args.hasArgNoClaim(options::OPT_offload_EQ)) - TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target, - HostTC, Args); - else if (Target.getArch() == llvm::Triple::spirv64 && - Target.getVendor() == llvm::Triple::UnknownVendor && - Target.getOS() == llvm::Triple::UnknownOS) - TC = std::make_unique<toolchains::HIPSPVToolChain>(*this, Target, - HostTC, Args); - break; - } - case Action::OFK_SYCL: - if (Target.isSPIROrSPIRV()) - TC = std::make_unique<toolchains::SYCLToolChain>(*this, Target, HostTC, - Args); - break; - default: - break; - } - } - assert(TC && "Could n... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/125556 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits