llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver Author: Johannes Doerfert (jdoerfert) <details> <summary>Changes</summary> This provides the header overlay for cuda_runtime.h which is found before any CUDA installation (none is necessary). Some basic APIs are defined in terms of the omp_target_* ones, but with the pending LLVM/Offload API redesign the requirements of CUDA should be taken into account. Note: Async is not exposed by the existing runtime thus the streams are ignored. I'll address this in a follow up. --- Patch is 45.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94821.diff 28 Files Affected: - (modified) clang/include/clang/Basic/LangOptions.def (+1) - (modified) clang/include/clang/Driver/Options.td (+6) - (modified) clang/lib/CodeGen/CGCUDANV.cpp (+62-15) - (modified) clang/lib/Driver/Driver.cpp (+12-7) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+26-4) - (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+6-1) - (modified) clang/lib/Headers/CMakeLists.txt (+16-3) - (added) clang/lib/Headers/llvm_offload_wrappers/__llvm_offload.h (+31) - (added) clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_device.h (+10) - (added) clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_host.h (+15) - (added) clang/lib/Headers/llvm_offload_wrappers/cuda_runtime.h (+131) - (modified) clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h (+2-4) - (modified) clang/lib/Sema/SemaCUDA.cpp (+3) - (added) clang/test/Driver/cuda-via-liboffload.cu (+23) - (modified) offload/include/Shared/APITypes.h (+19-14) - (modified) offload/include/omptarget.h (+1-1) - (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+8-4) - (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+11-3) - (modified) offload/src/CMakeLists.txt (+1) - (added) offload/src/KernelLanguage/API.cpp (+76) - (modified) offload/src/exports (+3) - (modified) offload/test/lit.cfg (+1-1) - (added) offload/test/offloading/CUDA/basic_api_malloc_free.cu (+41) - (added) offload/test/offloading/CUDA/basic_api_memcpy.cu (+46) - (added) offload/test/offloading/CUDA/basic_api_memset.cu (+43) - (added) offload/test/offloading/CUDA/basic_launch.cu (+29) - (added) offload/test/offloading/CUDA/basic_launch_blocks_and_threads.cu (+30) - (added) offload/test/offloading/CUDA/basic_launch_multi_arg.cu (+37) ``````````diff diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 4061451b2150a..8aff98867202e 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -288,6 +288,7 @@ LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kern LANGOPT(GPUDeferDiag, 1, 0, "defer host/device related diagnostic messages for CUDA/HIP") LANGOPT(GPUExcludeWrongSideOverloads, 1, 0, "always exclude wrong side overloads in overloading resolution for CUDA/HIP") LANGOPT(OffloadingNewDriver, 1, 0, "use the new driver for generating offloading code.") +LANGOPT(OffloadViaLLVM, 1, 0, "target LLVM/Offload as portable offloading runtime.") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 57f37c5023110..a09d75917ff98 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1275,6 +1275,12 @@ def no_offload_compress : Flag<["--"], "no-offload-compress">; def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">, Flags<[HelpHidden]>, HelpText<"Compression level for offload device binaries (HIP only)">; + +defm offload_via_llvm : BoolFOption<"offload-via-llvm", + LangOpts<"OffloadViaLLVM">, DefaultFalse, + PosFlag<SetTrue, [], [ClangOption, CC1Option], "Use">, + NegFlag<SetFalse, [], [ClangOption], "Don't use">, + BothFlags<[], [ClangOption], " LLVM/Offload as portable offloading runtime.">>; } // CUDA options diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 43dfbbb90dd52..8e32aad88a26d 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -15,10 +15,12 @@ #include "CGCXXABI.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/Basic/Cuda.h" #include "clang/CodeGen/CodeGenABITypes.h" #include "clang/CodeGen/ConstantInitBuilder.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Frontend/Offloading/Utility.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -36,6 +38,11 @@ constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF" class CGNVCUDARuntime : public CGCUDARuntime { + /// The prefix used for function calls and section names (CUDA, HIP, LLVM) + StringRef Prefix; + /// TODO: We should transition the OpenMP section to LLVM/Offload + StringRef SectionPrefix; + private: llvm::IntegerType *IntTy, *SizeTy; llvm::Type *VoidTy; @@ -132,6 +139,9 @@ class CGNVCUDARuntime : public CGCUDARuntime { return DummyFunc; } + Address prepareKernelArgs(CodeGenFunction &CGF, FunctionArgList &Args); + Address prepareKernelArgsLLVMOffload(CodeGenFunction &CGF, + FunctionArgList &Args); void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); std::string getDeviceSideName(const NamedDecl *ND) override; @@ -191,15 +201,11 @@ class CGNVCUDARuntime : public CGCUDARuntime { } // end anonymous namespace std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const { - if (CGM.getLangOpts().HIP) - return ((Twine("hip") + Twine(FuncName)).str()); - return ((Twine("cuda") + Twine(FuncName)).str()); + return (Prefix + FuncName).str(); } std::string CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { - if (CGM.getLangOpts().HIP) - return ((Twine("__hip") + Twine(FuncName)).str()); - return ((Twine("__cuda") + Twine(FuncName)).str()); + return ("__" + Prefix + FuncName).str(); } static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) { @@ -227,6 +233,14 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) SizeTy = CGM.SizeTy; VoidTy = CGM.VoidTy; PtrTy = CGM.UnqualPtrTy; + + if (CGM.getLangOpts().OffloadViaLLVM) { + Prefix = "llvm"; + SectionPrefix = "omp"; + } else if (CGM.getLangOpts().HIP) + SectionPrefix = Prefix = "hip"; + else + SectionPrefix = Prefix = "cuda"; } llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { @@ -305,18 +319,37 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, } if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH) || - (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI)) + (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI) || + (CGF.getLangOpts().OffloadViaLLVM)) emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); } -// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local -// array and kernels are launched using cudaLaunchKernel(). -void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, - FunctionArgList &Args) { - // Build the shadow stack entry at the very start of the function. +/// CUDA passes the arguments with a level of indirection. For example, a +/// (void*, short, void*) is passed as {void **, short *, void **} to the launch +/// function. For the LLVM/offload launch we flatten the arguments into the +/// struct directly, thus pass {void *, short, void *} +Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF, + FunctionArgList &Args) { + SmallVector<llvm::Type *> ArgTypes; + for (auto &Arg : Args) + ArgTypes.push_back(CGF.ConvertTypeForMem(Arg->getType())); + + llvm::StructType *ST = llvm::StructType::create(ArgTypes); + Address KernelArgs = CGF.CreateTempAllocaWithoutCast( + ST, CharUnits::fromQuantity(16), "kernel_args"); + + for (unsigned i = 0; i < Args.size(); ++i) { + auto *ArgVal = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(Args[i])); + CGF.Builder.CreateStore(ArgVal, CGF.Builder.CreateStructGEP(KernelArgs, i)); + } + return KernelArgs; +} + +Address CGNVCUDARuntime::prepareKernelArgs(CodeGenFunction &CGF, + FunctionArgList &Args) { // Calculate amount of space we will need for all arguments. If we have no // args, allocate a single pointer so we still have a valid pointer to the // argument array that we can pass to runtime, even if it will be unused. @@ -331,6 +364,17 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, VoidVarPtr, CGF.Builder.CreateConstGEP1_32( PtrTy, KernelArgs.emitRawPointer(CGF), i)); } + return KernelArgs; +} + +// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local +// array and kernels are launched using cudaLaunchKernel(). +void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, + FunctionArgList &Args) { + // Build the shadow stack entry at the very start of the function. + Address KernelArgs = CGF.getLangOpts().OffloadViaLLVM + ? prepareKernelArgsLLVMOffload(CGF, Args) + : prepareKernelArgs(CGF, Args); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); @@ -1129,8 +1173,9 @@ void CGNVCUDARuntime::transformManagedVars() { // registered. The linker will provide a pointer to this section so we can // register the symbols with the linked device image. void CGNVCUDARuntime::createOffloadingEntries() { - StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" - : "cuda_offloading_entries"; + SmallVector<char, 32> Out; + StringRef Section = (SectionPrefix + "_offloading_entries").toStringRef(Out); + llvm::Module &M = CGM.getModule(); for (KernelInfo &I : EmittedKernels) llvm::offloading::emitOffloadingEntry( @@ -1199,7 +1244,9 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() { } return nullptr; } - if (CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode) + if (CGM.getLangOpts().OffloadViaLLVM) + createOffloadingEntries(); + else if (CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode) createOffloadingEntries(); else return makeModuleCtorFunction(); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f5ea73a04ae5c..815149a49d018 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -792,11 +792,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, }) || C.getInputArgs().hasArg(options::OPT_hip_link) || C.getInputArgs().hasArg(options::OPT_hipstdpar); + bool UseLLVMOffload = C.getInputArgs().hasArg( + options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; } - if (IsCuda) { + if (IsCuda && !UseLLVMOffload) { const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); const llvm::Triple &HostTriple = HostTC->getTriple(); auto OFK = Action::OFK_Cuda; @@ -818,7 +820,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, CudaInstallation.WarnIfUnsupportedVersion(); } C.addOffloadDeviceToolChain(CudaTC.get(), OFK); - } else if (IsHIP) { + } else if (IsHIP && !UseLLVMOffload) { if (auto *OMPTargetArg = C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { Diag(clang::diag::err_drv_unsupported_opt_for_language_mode) @@ -842,10 +844,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // We need to generate an OpenMP toolchain if the user specified targets with // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. bool IsOpenMPOffloading = - C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || - C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)); + ((IsCuda || IsHIP) && UseLLVMOffload) || + (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false) && + (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || + C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); if (IsOpenMPOffloading) { // We expect that -fopenmp-targets is always used in conjunction with the // option -fopenmp specifying a valid runtime with offloading support, i.e. @@ -873,7 +876,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, for (StringRef T : OpenMPTargets->getValues()) OpenMPTriples.insert(T); } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - !IsHIP && !IsCuda) { + ((!IsHIP && !IsCuda) || UseLLVMOffload)) { const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), @@ -4138,6 +4141,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, bool UseNewOffloadingDriver = C.isOffloadingHostKind(Action::OFK_OpenMP) || + Args.hasFlag(options::OPT_foffload_via_llvm, + options::OPT_fno_offload_via_llvm, false) || Args.hasFlag(options::OPT_offload_new_driver, options::OPT_no_offload_new_driver, false); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4e1c52462e584..e9589a691c8dc 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1125,6 +1125,22 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, CmdArgs.push_back("__clang_openmp_device_functions.h"); } + if (Args.hasArg(options::OPT_foffload_via_llvm)) { + // Add llvm_wrappers/* to our system include path. This lets us wrap + // standard library headers and other headers. + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "include"); + llvm::sys::path::append(P, "llvm_offload_wrappers"); + CmdArgs.push_back("-internal-isystem"); + CmdArgs.push_back(Args.MakeArgString(P)); + + CmdArgs.push_back("-include"); + if (JA.isDeviceOffloading(Action::OFK_OpenMP)) + CmdArgs.push_back("__llvm_offload_device.h"); + else + CmdArgs.push_back("__llvm_offload_host.h"); + } + // Add -i* options, and automatically translate to // -include-pch/-include-pth for transparent PCH support. It's // wonky, but we include looking for .gch so we can support seamless @@ -6672,11 +6688,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.addOptOutFlag(CmdArgs, options::OPT_fopenmp_extensions, options::OPT_fno_openmp_extensions); } - - // Forward the new driver to change offloading code generation. - if (Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) + // Forward the offload runtime change to code generation, liboffload implies + // new driver. Otherwise, check if we should forward the new driver to change + // offloading code generation. + if (Args.hasFlag(options::OPT_foffload_via_llvm, + options::OPT_fno_offload_via_llvm, false)) { CmdArgs.push_back("--offload-new-driver"); + CmdArgs.push_back("-foffload-via-llvm"); + } else if (Args.hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false)) { + CmdArgs.push_back("--offload-new-driver"); + } SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 71e993119436a..74a69f65f7ad5 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1144,8 +1144,13 @@ bool tools::addOpenMPRuntime(const Compilation &C, ArgStringList &CmdArgs, bool ForceStaticHostRuntime, bool IsOffloadingHost, bool GompNeedsRT) { if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false)) + options::OPT_fno_openmp, false)) { + // We need libomptarget (liboffload) if it's the choosen offloading runtime. + if (Args.hasFlag(options::OPT_foffload_via_llvm, + options::OPT_fno_offload_via_llvm, false)) + CmdArgs.push_back("-lomptarget"); return false; + } Driver::OpenMPRuntimeKind RTKind = TC.getDriver().getOpenMPRuntime(Args); diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index d3090e488306f..251e5b0ba2381 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -325,6 +325,13 @@ set(openmp_wrapper_files openmp_wrappers/new ) +set(llvm_offload_wrapper_files + llvm_offload_wrappers/__llvm_offload.h + llvm_offload_wrappers/__llvm_offload_host.h + llvm_offload_wrappers/__llvm_offload_device.h + llvm_offload_wrappers/cuda_runtime.h +) + set(llvm_libc_wrapper_files llvm_libc_wrappers/assert.h llvm_libc_wrappers/stdio.h @@ -375,7 +382,7 @@ endfunction(clang_generate_header) # Copy header files from the source directory to the build directory foreach( f ${files} ${cuda_wrapper_files} ${cuda_wrapper_bits_files} ${ppc_wrapper_files} ${openmp_wrapper_files} ${zos_wrapper_files} ${hlsl_files} - ${llvm_libc_wrapper_files}) + ${llvm_libc_wrapper_files} ${llvm_offload_wrapper_files}) copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} ${f}) endforeach( f ) @@ -501,6 +508,7 @@ add_header_target("hlsl-resource-headers" ${hlsl_files}) add_header_target("opencl-resource-headers" ${opencl_files}) add_header_target("llvm-libc-resource-headers" ${llvm_libc_wrapper_files}) add_header_target("openmp-resource-headers" ${openmp_wrapper_files}) +add_header_target("llvm-offload-resource-headers" ${llvm_libc_wrapper_files}) add_header_target("windows-resource-headers" ${windows_only_files}) add_header_target("utility-resource-headers" ${utility_files}) @@ -542,6 +550,11 @@ install( DESTINATION ${header_install_dir}/openmp_wrappers COMPONENT clang-resource-headers) +install( + FILES ${llvm_offload_wrapper_files} + DESTINATION ${header_install_dir}/llvm_offload_wrappers + COMPONENT clang-resource-headers) + install( FILES ${zos_wrapper_files} DESTINATION ${header_install_dir}/zos_wrappers @@ -704,8 +717,8 @@ install( COMPONENT openmp-resource-headers) install( - FILES ${openmp_wrapper_files} - DESTINATION ${header_install_dir}/openmp_wrappers + FILES ${llvm_offload_wrapper_files} + DESTINATION ${header_install_dir}/llvm_offload_wrappers EXCLUDE_FROM_ALL COMPONENT openmp-resource-headers) diff --git a/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload.h b/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload.h new file mode 100644 index 0000000000000..d78e3b41f99a5 --- /dev/null +++ b/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload.h @@ -0,0 +1,31 @@ +/*===------ LLVM/Offload helpers for kernel languages (CUDA/HIP) -*- c++ -*-=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#include <stdlib.h> + +#define __host__ __attribute__((host)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) +#define __managed__ __attribute__((managed)) + +extern "C" { + +typedef struct dim3 { + dim3() {} + dim3(unsigned x) : x(x) {} + unsigned x = 0, y = 0, z = 0; +} dim3; + +// TODO: For some reason the CUDA device compilation requires this declaration +// to be present but it should not. +unsigned __llvmPushCallConfiguration(dim3 gridDim, dim3 blockDim, + size_t sharedMem = 0, void *stream = 0); +} diff --git a/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_device.h b/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_device.h new file mode 100644 index 0000000000000..1a813b331515b --- /dev/null +++ b/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_device.h @@ -0,0 +1,10 @@ +/*===------ LLVM/Offload helpers for kernel languages (CUDA/HIP) -*- c++ -*-=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#include "__llvm_offload.h" diff --git a/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_host.h b/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_host.h new file mode 100644 index 0000000000000..160289d169b55 --- /dev/null +++ b/clang/lib/Headers/llvm_offload_wrappers/__llvm_offload_host.h @@ -0,0 +1,15 @@ +/*===------ LLVM/Offload helpers for kernel languages (CUDA/HIP) -*- c++ -*-=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#include "__llvm_offload.h" + +extern "C" { +unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + void **args, size_t sharedMem = 0, void *stream = 0); +} diff --git a/clang/lib/Headers/llvm_offload_wrappers/cuda_runtime.h b/clang/lib/Headers/llvm_offload_wrappers/cuda_runtime.h new file mode 100644 index 0000000000000..8718e462a82d3 --- /dev/null +++ b/clang/lib/Headers/llvm_offload_wrappers/cuda_runtime.h @@ -0,0 +1,131 @@ +/*===- __cuda_... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/94821 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits