r340056 - [HIP] Make __hip_gpubin_handle hidden to avoid being merged across different shared libraries
Author: yaxunl Date: Fri Aug 17 10:47:31 2018 New Revision: 340056 URL: http://llvm.org/viewvc/llvm-project?rev=340056&view=rev Log: [HIP] Make __hip_gpubin_handle hidden to avoid being merged across different shared libraries Different shared libraries contain different fat binary, which is stored in a global variable __hip_gpubin_handle. Since different compilation units share the same fat binary, this variable has linkonce linkage. However, it should not be merged across different shared libraries. This patch set the visibility of the global variable to be hidden, which will make it invisible in the shared library, therefore preventing it from being merged. Differential Revision: https://reviews.llvm.org/D50596 Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=340056&r1=340055&r2=340056&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Fri Aug 17 10:47:31 2018 @@ -459,6 +459,8 @@ llvm::Function *CGNVCUDARuntime::makeMod /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__hip_gpubin_handle"); GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); +// Prevent the weak symbol in different shared libraries being merged. +GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility); Address GpuBinaryAddr( GpuBinaryHandle, CharUnits::fromQuantity(GpuBinaryHandle->getAlignment())); Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=340056&r1=340055&r2=340056&view=diff == --- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original) +++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Fri Aug 17 10:47:31 2018 @@ -80,7 +80,7 @@ void use_pointers() { // HIP-SAME: section ".hipFatBinSegment" // * variable to save GPU binary handle after initialization // CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null -// HIP: @__[[PREFIX]]_gpubin_handle = linkonce global i8** null +// HIP: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null // * constant unnamed string with NVModuleID // RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant // CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r331811 - [HIP] Add hip offload kind
Author: yaxunl Date: Tue May 8 14:02:12 2018 New Revision: 331811 URL: http://llvm.org/viewvc/llvm-project?rev=331811&view=rev Log: [HIP] Add hip offload kind There are quite differences in HIP action builder and action job creation, which justifies to define a separate offload kind. Differential Revision: https://reviews.llvm.org/D46471 Modified: cfe/trunk/include/clang/Driver/Action.h cfe/trunk/lib/Driver/Action.cpp cfe/trunk/lib/Driver/Compilation.cpp cfe/trunk/lib/Driver/ToolChains/Clang.cpp Modified: cfe/trunk/include/clang/Driver/Action.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=331811&r1=331810&r2=331811&view=diff == --- cfe/trunk/include/clang/Driver/Action.h (original) +++ cfe/trunk/include/clang/Driver/Action.h Tue May 8 14:02:12 2018 @@ -88,6 +88,7 @@ public: // The device offloading tool chains - one bit for each programming model. OFK_Cuda = 0x02, OFK_OpenMP = 0x04, +OFK_HIP = 0x08, }; static const char *getClassName(ActionClass AC); Modified: cfe/trunk/lib/Driver/Action.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=331811&r1=331810&r2=331811&view=diff == --- cfe/trunk/lib/Driver/Action.cpp (original) +++ cfe/trunk/lib/Driver/Action.cpp Tue May 8 14:02:12 2018 @@ -96,6 +96,8 @@ std::string Action::getOffloadingKindPre return "device-cuda"; case OFK_OpenMP: return "device-openmp"; + case OFK_HIP: +return "device-hip"; // TODO: Add other programming models here. } @@ -104,8 +106,13 @@ std::string Action::getOffloadingKindPre return {}; std::string Res("host"); + assert(!((ActiveOffloadKindMask & OFK_Cuda) && + (ActiveOffloadKindMask & OFK_HIP)) && + "Cannot offload CUDA and HIP at the same time"); if (ActiveOffloadKindMask & OFK_Cuda) Res += "-cuda"; + if (ActiveOffloadKindMask & OFK_HIP) +Res += "-hip"; if (ActiveOffloadKindMask & OFK_OpenMP) Res += "-openmp"; @@ -142,6 +149,8 @@ StringRef Action::GetOffloadKindName(Off return "cuda"; case OFK_OpenMP: return "openmp"; + case OFK_HIP: +return "hip"; // TODO: Add other programming models here. } Modified: cfe/trunk/lib/Driver/Compilation.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Compilation.cpp?rev=331811&r1=331810&r2=331811&view=diff == --- cfe/trunk/lib/Driver/Compilation.cpp (original) +++ cfe/trunk/lib/Driver/Compilation.cpp Tue May 8 14:02:12 2018 @@ -196,10 +196,10 @@ static bool ActionFailed(const Action *A if (FailingCommands.empty()) return false; - // CUDA can have the same input source code compiled multiple times so do not - // compiled again if there are already failures. It is OK to abort the CUDA - // pipeline on errors. - if (A->isOffloading(Action::OFK_Cuda)) + // CUDA/HIP can have the same input source code compiled multiple times so do + // not compiled again if there are already failures. It is OK to abort the + // CUDA pipeline on errors. + if (A->isOffloading(Action::OFK_Cuda) || A->isOffloading(Action::OFK_HIP)) return true; for (const auto &CI : FailingCommands) Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=331811&r1=331810&r2=331811&view=diff == --- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Tue May 8 14:02:12 2018 @@ -131,6 +131,10 @@ forAllAssociatedToolChains(Compilation & Work(*C.getSingleOffloadToolChain()); else if (JA.isDeviceOffloading(Action::OFK_Cuda)) Work(*C.getSingleOffloadToolChain()); + else if (JA.isHostOffloading(Action::OFK_HIP)) +Work(*C.getSingleOffloadToolChain()); + else if (JA.isDeviceOffloading(Action::OFK_HIP)) +Work(*C.getSingleOffloadToolChain()); if (JA.isHostOffloading(Action::OFK_OpenMP)) { auto TCs = C.getOffloadToolChains(); @@ -3105,13 +3109,14 @@ void Clang::ConstructJob(Compilation &C, // Check number of inputs for sanity. We need at least one input. assert(Inputs.size() >= 1 && "Must have at least one input."); const InputInfo &Input = Inputs[0]; - // CUDA compilation may have multiple inputs (source file + results of + // CUDA/HIP compilation may have multiple inputs (source file + results of // device-side compilations). OpenMP device jobs also take the host IR as a // second input. All other jobs are expected to have exactly one // input. bool IsCuda = JA.isOffloading(Action::OFK_Cuda); + bool IsHIP = JA.isOffloading(Action::OFK_HIP); bool IsOpenMPDevice = JA.isDeviceOffloading(Action::O
r331895 - [OpenCL] Fix typos in emitted enqueue kernel function names
Author: yaxunl Date: Wed May 9 10:07:06 2018 New Revision: 331895 URL: http://llvm.org/viewvc/llvm-project?rev=331895&view=rev Log: [OpenCL] Fix typos in emitted enqueue kernel function names Two typos: vaarg => vararg get_kernel_preferred_work_group_multiple => get_kernel_preferred_work_group_size_multiple Differential Revision: https://reviews.llvm.org/D46601 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=331895&r1=331894&r2=331895&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May 9 10:07:06 2018 @@ -3164,10 +3164,10 @@ RValue CodeGenFunction::EmitBuiltinExpr( return Ptr; }; -// Could have events and/or vaargs. +// Could have events and/or varargs. if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. - Name = "__enqueue_kernel_vaargs"; + Name = "__enqueue_kernel_varargs"; auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); llvm::Value *Kernel = @@ -3235,7 +3235,7 @@ RValue CodeGenFunction::EmitBuiltinExpr( // Pass the number of variadics to the runtime function too. Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); ArgTys.push_back(Int32Ty); - Name = "__enqueue_kernel_events_vaargs"; + Name = "__enqueue_kernel_events_varargs"; auto *PtrToSizeArray = CreateArrayForSizeVar(7); Args.push_back(PtrToSizeArray); @@ -3276,7 +3276,7 @@ RValue CodeGenFunction::EmitBuiltinExpr( CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, false), -"__get_kernel_preferred_work_group_multiple_impl"), +"__get_kernel_preferred_work_group_size_multiple_impl"), {Kernel, Arg})); } case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=331895&r1=331894&r2=331895&view=diff == --- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Wed May 9 10:07:06 2018 @@ -88,7 +88,7 @@ kernel void device_side_enqueue(global i // B64: %[[TMP:.*]] = alloca [1 x i64] // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 // B64: store i64 256, i64* %[[TMP1]], align 8 - // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs( + // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, @@ -109,7 +109,7 @@ kernel void device_side_enqueue(global i // B64: %[[TMP:.*]] = alloca [1 x i64] // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 - // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs( + // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, @@ -133,7 +133,7 @@ kernel void device_side_enqueue(global i // B64: %[[TMP:.*]] = alloca [1 x i64] // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 // B64: store i64 256, i64* %[[TMP1]], align 8 - // COMMON-LABEL: call i32 @__enqueue_kernel_events_vaargs + // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*),
r332121 - [HIP] Let clang-offload-bundler support HIP
Author: yaxunl Date: Fri May 11 12:02:18 2018 New Revision: 332121 URL: http://llvm.org/viewvc/llvm-project?rev=332121&view=rev Log: [HIP] Let clang-offload-bundler support HIP When bundle/unbundle intermediate files for HIP, there may be multiple sub archs, therefore BoundArch needs to be included in the target and output file names for clang-offload-bundler. Differential Revision: https://reviews.llvm.org/D46473 Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/ToolChains/Clang.cpp cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=332121&r1=332120&r2=332121&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Fri May 11 12:02:18 2018 @@ -3736,9 +3736,12 @@ InputInfo Driver::BuildJobsForActionNoCa UI.DependentToolChain->getTriple().normalize(), /*CreatePrefixForHost=*/true); auto CurI = InputInfo( - UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch, - /*AtTopLevel=*/false, MultipleArchs, - OffloadingPrefix), + UA, + GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch, + /*AtTopLevel=*/false, + MultipleArchs || + UI.DependentOffloadKind == Action::OFK_HIP, + OffloadingPrefix), BaseInput); // Save the unbundling result. UnbundlingResults.push_back(CurI); Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=332121&r1=332120&r2=332121&view=diff == --- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Fri May 11 12:02:18 2018 @@ -5542,6 +5542,10 @@ void OffloadBundler::ConstructJob(Compil Triples += Action::GetOffloadKindName(CurKind); Triples += '-'; Triples += CurTC->getTriple().normalize(); +if (CurKind == Action::OFK_HIP && CurDep->getOffloadingArch()) { + Triples += '-'; + Triples += CurDep->getOffloadingArch(); +} } CmdArgs.push_back(TCArgs.MakeArgString(Triples)); @@ -5611,6 +5615,11 @@ void OffloadBundler::ConstructJobMultipl Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind); Triples += '-'; Triples += Dep.DependentToolChain->getTriple().normalize(); +if (Dep.DependentOffloadKind == Action::OFK_HIP && +!Dep.DependentBoundArch.empty()) { + Triples += '-'; + Triples += Dep.DependentBoundArch; +} } CmdArgs.push_back(TCArgs.MakeArgString(Triples)); Modified: cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp?rev=332121&r1=332120&r2=332121&view=diff == --- cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp (original) +++ cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Fri May 11 12:02:18 2018 @@ -969,11 +969,11 @@ int main(int argc, const char **argv) { getOffloadKindAndTriple(Target, Kind, Triple); bool KindIsValid = !Kind.empty(); -KindIsValid = KindIsValid && - StringSwitch(Kind) - .Case("host", true) - .Case("openmp", true) - .Default(false); +KindIsValid = KindIsValid && StringSwitch(Kind) + .Case("host", true) + .Case("openmp", true) + .Case("hip", true) + .Default(false); bool TripleIsValid = !Triple.empty(); llvm::Triple T(Triple); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332122 - [HIP] Diagnose unsupported host triple
Author: yaxunl Date: Fri May 11 12:14:34 2018 New Revision: 332122 URL: http://llvm.org/viewvc/llvm-project?rev=332122&view=rev Log: [HIP] Diagnose unsupported host triple Differential Revision: https://reviews.llvm.org/D46487 Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/cuda-bad-arch.cu Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td?rev=332122&r1=332121&r2=332122&view=diff == --- cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td (original) +++ cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td Fri May 11 12:14:34 2018 @@ -40,7 +40,7 @@ def err_drv_cuda_version_unsupported : E "but installation at %3 is %4. Use --cuda-path to specify a different CUDA " "install, pass a different GPU arch with --cuda-gpu-arch, or pass " "--no-cuda-version-check.">; -def err_drv_cuda_nvptx_host : Error<"unsupported use of NVPTX for host compilation.">; +def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=332122&r1=332121&r2=332122&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Fri May 11 12:14:34 2018 @@ -2338,11 +2338,13 @@ class OffloadingActionBuilder final { const ToolChain *HostTC = C.getSingleOffloadToolChain(); assert(HostTC && "No toolchain for host compilation."); - if (HostTC->getTriple().isNVPTX()) { -// We do not support targeting NVPTX for host compilation. Throw + if (HostTC->getTriple().isNVPTX() || + HostTC->getTriple().getArch() == llvm::Triple::amdgcn) { +// We do not support targeting NVPTX/AMDGCN for host compilation. Throw // an error and abort pipeline construction early so we don't trip // asserts that assume device-side compilation. -C.getDriver().Diag(diag::err_drv_cuda_nvptx_host); +C.getDriver().Diag(diag::err_drv_cuda_host_arch) +<< HostTC->getTriple().getArchName(); return true; } Modified: cfe/trunk/test/Driver/cuda-bad-arch.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-bad-arch.cu?rev=332122&r1=332121&r2=332122&view=diff == --- cfe/trunk/test/Driver/cuda-bad-arch.cu (original) +++ cfe/trunk/test/Driver/cuda-bad-arch.cu Fri May 11 12:14:34 2018 @@ -2,6 +2,7 @@ // REQUIRES: clang-driver // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target +// REQUIRES: amdgpu-registered-target // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=compute_20 -c %s 2>&1 \ // RUN: | FileCheck -check-prefix BAD %s @@ -25,9 +26,12 @@ // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ // RUN: | FileCheck -check-prefix OK %s -// We don't allow using NVPTX for host compilation. +// We don't allow using NVPTX/AMDGCN for host compilation. // RUN: %clang -### --cuda-host-only -target nvptx-nvidia-cuda -c %s 2>&1 \ // RUN: | FileCheck -check-prefix HOST_NVPTX %s +// RUN: %clang -### --cuda-host-only -target amdgcn-amd-amdhsa -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix HOST_AMDGCN %s // OK-NOT: error: Unsupported CUDA gpu architecture -// HOST_NVPTX: error: unsupported use of NVPTX for host compilation. +// HOST_NVPTX: error: unsupported architecture 'nvptx' for host compilation. +// HOST_AMDGCN: error: unsupported architecture 'amdgcn' for host compilation. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332123 - [HIP] Set proper triple and offload kind for the toolchain
Author: yaxunl Date: Fri May 11 12:21:39 2018 New Revision: 332123 URL: http://llvm.org/viewvc/llvm-project?rev=332123&view=rev Log: [HIP] Set proper triple and offload kind for the toolchain Also introduce --hip-link option to indicate HIP for linking. Differential Revision: https://reviews.llvm.org/D46475 Added: cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/ cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/a.cu cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/b.hip cfe/trunk/test/Driver/hip-inputs.hip Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td cfe/trunk/include/clang/Driver/Options.td cfe/trunk/include/clang/Driver/Types.h cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Types.cpp Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td?rev=332123&r1=332122&r2=332123&view=diff == --- cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td (original) +++ cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td Fri May 11 12:21:39 2018 @@ -41,6 +41,7 @@ def err_drv_cuda_version_unsupported : E "install, pass a different GPU arch with --cuda-gpu-arch, or pass " "--no-cuda-version-check.">; def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; +def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=332123&r1=332122&r2=332123&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Fri May 11 12:21:39 2018 @@ -557,6 +557,8 @@ def no_cuda_include_ptx_EQ : Joined<["-- HelpText<"Do not include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>, HelpText<"CUDA GPU architecture (e.g. sm_35). May be specified more than once.">; +def hip_link : Flag<["--"], "hip-link">, + HelpText<"Link clang-offload-bundler bundles for HIP">; def no_cuda_gpu_arch_EQ : Joined<["--"], "no-cuda-gpu-arch=">, Flags<[DriverOption]>, HelpText<"Remove GPU architecture (e.g. sm_35) from the list of GPUs to compile for. " "'all' resets the list to its default value.">; Modified: cfe/trunk/include/clang/Driver/Types.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.h?rev=332123&r1=332122&r2=332123&view=diff == --- cfe/trunk/include/clang/Driver/Types.h (original) +++ cfe/trunk/include/clang/Driver/Types.h Fri May 11 12:21:39 2018 @@ -77,6 +77,9 @@ namespace types { /// isCuda - Is this a CUDA input. bool isCuda(ID Id); + /// isHIP - Is this a HIP input. + bool isHIP(ID Id); + /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and headers). bool isObjC(ID Id); Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=332123&r1=332122&r2=332123&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Fri May 11 12:21:39 2018 @@ -538,24 +538,46 @@ void Driver::CreateOffloadingDeviceToolC InputList &Inputs) { // - // CUDA + // CUDA/HIP // - // We need to generate a CUDA toolchain if any of the inputs has a CUDA type. - if (llvm::any_of(Inputs, [](std::pair &I) { + // We need to generate a CUDA toolchain if any of the inputs has a CUDA + // or HIP type. However, mixed CUDA/HIP compilation is not supported. + bool IsCuda = + llvm::any_of(Inputs, [](std::pair &I) { return types::isCuda(I.first); - })) { + }); + bool IsHIP = + llvm::any_of(Inputs, + [](std::pair &I) { + return types::isHIP(I.first); + }) || + C.getInputArgs().hasArg(options::OPT_hip_link); + if (IsCuda && IsHIP) { +Diag(clang::diag::err_drv_mix_cuda_hip); +return; + } + if (IsCuda || IsHIP) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); const llvm::Triple &HostTriple = HostTC->getTriple(); -llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"); -// Use the CUDA and host triples as the key into t
r332279 - CodeGen: Emit string literal in constant address space
Author: yaxunl Date: Mon May 14 12:20:12 2018 New Revision: 332279 URL: http://llvm.org/viewvc/llvm-project?rev=332279&view=rev Log: CodeGen: Emit string literal in constant address space Some targets have constant address space (e.g. amdgcn). For them string literal should be emitted in constant address space then casted to default address space. Differential Revision: https://reviews.llvm.org/D46643 Added: cfe/trunk/test/CodeGenCXX/amdgcn-string-literal.cpp Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/CodeGen/CodeGenModule.h Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=332279&r1=332278&r2=332279&view=diff == --- cfe/trunk/lib/CodeGen/CGDecl.cpp (original) +++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon May 14 12:20:12 2018 @@ -1374,7 +1374,7 @@ void CodeGenFunction::EmitAutoVarInit(co llvm::ConstantInt::get(IntPtrTy, getContext().getTypeSizeInChars(type).getQuantity()); - llvm::Type *BP = AllocaInt8PtrTy; + llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace()); if (Loc.getType() != BP) Loc = Builder.CreateBitCast(Loc, BP); @@ -1395,11 +1395,10 @@ void CodeGenFunction::EmitAutoVarInit(co // Otherwise, create a temporary global with the initializer then // memcpy from the global to the alloca. std::string Name = getStaticDeclName(CGM, D); -unsigned AS = 0; -if (getLangOpts().OpenCL) { - AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); - BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); -} +unsigned AS = CGM.getContext().getTargetAddressSpace( +CGM.getStringLiteralAddressSpace()); +BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); + llvm::GlobalVariable *GV = new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true, llvm::GlobalValue::PrivateLinkage, Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=332279&r1=332278&r2=332279&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon May 14 12:20:12 2018 @@ -3044,6 +3044,39 @@ LangAS CodeGenModule::GetGlobalVarAddres return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } +LangAS CodeGenModule::getStringLiteralAddressSpace() const { + // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. + if (LangOpts.OpenCL) +return LangAS::opencl_constant; + if (auto AS = getTarget().getConstantAddressSpace()) +return AS.getValue(); + return LangAS::Default; +} + +// In address space agnostic languages, string literals are in default address +// space in AST. However, certain targets (e.g. amdgcn) request them to be +// emitted in constant address space in LLVM IR. To be consistent with other +// parts of AST, string literal global variables in constant address space +// need to be casted to default address space before being put into address +// map and referenced by other part of CodeGen. +// In OpenCL, string literals are in constant address space in AST, therefore +// they should not be casted to default address space. +static llvm::Constant * +castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM, + llvm::GlobalVariable *GV) { + llvm::Constant *Cast = GV; + if (!CGM.getLangOpts().OpenCL) { +if (auto AS = CGM.getTarget().getConstantAddressSpace()) { + if (AS != LangAS::Default) +Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast( +CGM, GV, AS.getValue(), LangAS::Default, +GV->getValueType()->getPointerTo( +CGM.getContext().getTargetAddressSpace(LangAS::Default))); +} + } + return Cast; +} + template void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV) { @@ -4039,10 +4072,8 @@ static llvm::GlobalVariable * GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, CodeGenModule &CGM, StringRef GlobalName, CharUnits Alignment) { - // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. - unsigned AddrSpace = 0; - if (CGM.getLangOpts().OpenCL) -AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); + unsigned AddrSpace = CGM.getContext().getTargetAddressSpace( + CGM.getStringLiteralAddressSpace()); llvm::Module &M = CGM.getModule(); // Create a global variable for this string @@ -4104,7 +4135,9 @@ CodeGenModule::GetAddrOfConstantStringFr SanitizerMD->reportGlobal
r332593 - CodeGen: Fix invalid bitcast for lifetime.start/end
Author: yaxunl Date: Thu May 17 04:16:35 2018 New Revision: 332593 URL: http://llvm.org/viewvc/llvm-project?rev=332593&view=rev Log: CodeGen: Fix invalid bitcast for lifetime.start/end lifetime.start/end expects pointer argument in alloca address space. However in C++ a temporary variable is in default address space. This patch changes API CreateMemTemp and CreateTempAlloca to get the original alloca instruction and pass it lifetime.start/end. It only affects targets with non-zero alloca address space. Differential Revision: https://reviews.llvm.org/D45900 Added: cfe/trunk/test/CodeGenCXX/amdgcn_declspec_get.cpp Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/lib/CodeGen/CGExpr.cpp cfe/trunk/lib/CodeGen/CGExprAgg.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=332593&r1=332592&r2=332593&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu May 17 04:16:35 2018 @@ -3812,16 +3812,17 @@ RValue CodeGenFunction::EmitCall(const C // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); + Address SRetAlloca = Address::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { if (!ReturnValue.isNull()) { SRetPtr = ReturnValue.getValue(); } else { - SRetPtr = CreateMemTemp(RetTy); + SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca); if (HaveInsertPoint() && ReturnValue.isUnused()) { uint64_t size = CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy)); -UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getPointer()); +UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer()); } } if (IRFunctionArgs.hasSRetArg()) { @@ -3888,7 +3889,8 @@ RValue CodeGenFunction::EmitCall(const C if (!I->isAggregate()) { // Make a temporary alloca to pass the argument. Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "indirect-arg-temp", false); + "indirect-arg-temp", /*Alloca=*/nullptr, + /*Cast=*/false); IRCallArgs[FirstIRArg] = Addr.getPointer(); I->copyInto(*this, Addr); @@ -3934,7 +3936,8 @@ RValue CodeGenFunction::EmitCall(const C if (NeedCopy) { // Create an aligned temporary, and copy to it. Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "byval-temp", false); + "byval-temp", /*Alloca=*/nullptr, + /*Cast=*/false); IRCallArgs[FirstIRArg] = AI.getPointer(); I->copyInto(*this, AI); } else { @@ -4062,6 +4065,7 @@ RValue CodeGenFunction::EmitCall(const C llvm::Value *tempSize = nullptr; Address addr = Address::invalid(); + Address AllocaAddr = Address::invalid(); if (I->isAggregate()) { addr = I->hasLValue() ? I->getKnownLValue().getAddress() : I->getKnownRValue().getAggregateAddress(); @@ -4076,9 +4080,11 @@ RValue CodeGenFunction::EmitCall(const C // Materialize to a temporary. addr = CreateTempAlloca(RV.getScalarVal()->getType(), - CharUnits::fromQuantity(std::max(layout->getAlignment(), - scalarAlign))); -tempSize = EmitLifetimeStart(scalarSize, addr.getPointer()); +CharUnits::fromQuantity(std::max( +layout->getAlignment(), scalarAlign)), +"tmp", +/*ArraySize=*/nullptr, &AllocaAddr); +tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); } @@ -4096,7 +4102,7 @@ RValue CodeGenFunction::EmitCall(const C assert(IRArgPos == FirstIRArg + NumIRArgs); if (tempSize) { -EmitLifetimeEnd(tempSize, addr.getPointer()); +EmitLifetimeEnd(tempSize, AllocaAddr.getPointer()); } break; @@ -4258,7 +4264,7 @@ RValue CodeGenFunction::EmitCall(const C // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. if (UnusedReturnSizePtr) -pushFullExprCleanup(NormalEHLifetimeMarker, SRetPtr, +pushFullExprCleanup(NormalEHLifetimeMarker, SRetAlloca, Unuse
r332724 - [HIP] Support offloading by linker script
Author: yaxunl Date: Fri May 18 08:07:56 2018 New Revision: 332724 URL: http://llvm.org/viewvc/llvm-project?rev=332724&view=rev Log: [HIP] Support offloading by linker script To support linking device code in different source files, it is necessary to embed fat binary at host linking stage. This patch emits an external symbol for fat binary in host codegen, then embed the fat binary by lld through a linker script. Differential Revision: https://reviews.llvm.org/D46472 Modified: cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp cfe/trunk/lib/Driver/ToolChains/CommonArgs.h cfe/trunk/lib/Driver/ToolChains/Gnu.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=332724&r1=332723&r2=332724&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Fri May 18 08:07:56 2018 @@ -586,6 +586,8 @@ def fno_cuda_rdc : Flag<["-"], "fno-cuda def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">; +def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">, + Group, Flags<[NoArgumentUnused, HelpHidden]>; def dA : Flag<["-"], "dA">, Group; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, HelpText<"Print macro definitions in -E mode in addition to normal output">; Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=332724&r1=332723&r2=332724&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Fri May 18 08:07:56 2018 @@ -27,6 +27,8 @@ using namespace clang; using namespace CodeGen; namespace { +constexpr unsigned CudaFatMagic = 0x466243b1; +constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF" class CGNVCUDARuntime : public CGCUDARuntime { @@ -310,19 +312,20 @@ llvm::Function *CGNVCUDARuntime::makeReg /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { + bool IsHIP = CGM.getLangOpts().HIP; // No need to generate ctors/dtors if there is no GPU binary. - std::string GpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; - if (GpuBinaryFileName.empty()) + StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; + if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; - // void __cuda_register_globals(void* handle); + // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // We always need a function to pass in as callback. Create a dummy // implementation if we don't need to register anything. if (RelocatableDeviceCode && !RegisterGlobalsFunc) RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy()); - // void ** __cudaRegisterFatBinary(void *); + // void ** __{cuda|hip}RegisterFatBinary(void *); llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), addUnderscoredPrefixToName("RegisterFatBinary")); @@ -334,12 +337,16 @@ llvm::Function *CGNVCUDARuntime::makeMod // global variable and save a reference in GpuBinaryHandle to be cleaned up // in destructor on exit. Then associate all known kernels with the GPU binary // handle so CUDA runtime can figure out what to call on the GPU side. - llvm::ErrorOr> GpuBinaryOrErr = - llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName); - if (std::error_code EC = GpuBinaryOrErr.getError()) { -CGM.getDiags().Report(diag::err_cannot_open_file) -<< GpuBinaryFileName << EC.message(); -return nullptr; + std::unique_ptr CudaGpuBinary; + if (!IsHIP) { +llvm::ErrorOr> CudaGpuBinaryOrErr = +llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName); +if (std::error_code EC = CudaGpuBinaryOrErr.getError()) { + CGM.getDiags().Report(diag::err_cannot_open_file) + << CudaGpuBinaryFileName << EC.message(); + return nullptr; +} +CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get()); } llvm::Function *ModuleCtorFunc = llvm::Function::Create( @@ -353,28 +360,60 @@ llvm::Function *CGNVCUDARuntime::makeMod CtorBuilder.SetInsertPoint(CtorEntryBB); const char *FatbinConstantName; - if (RelocatableDeviceCode) + const char *FatbinSectionName; + const char *ModuleIDSectionName; + StringRef ModuleIDPrefix; + llvm::Constant *FatBinStr; + unsigned FatMagic; + if (IsHIP) { +FatbinConstantName = "
r332982 - Call CreateTempMemWithoutCast for ActiveFlag
Author: yaxunl Date: Tue May 22 07:36:26 2018 New Revision: 332982 URL: http://llvm.org/viewvc/llvm-project?rev=332982&view=rev Log: Call CreateTempMemWithoutCast for ActiveFlag Introduced CreateMemTempWithoutCast and CreateTemporaryAllocaWithoutCast to emit alloca without casting to default addr space. ActiveFlag is a temporary variable emitted for clean up. It is defined as AllocaInst* type and there is a cast to AlllocaInst in SetActiveFlag. An alloca casted to generic pointer causes assertion in SetActiveFlag. Since there is only load/store of ActiveFlag, it is safe to use the original alloca, therefore use CreateMemTempWithoutCast is called. Differential Revision: https://reviews.llvm.org/D47099 Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGCleanup.cpp cfe/trunk/lib/CodeGen/CGExpr.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=332982&r1=332981&r2=332982&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Tue May 22 07:36:26 2018 @@ -3888,9 +3888,8 @@ RValue CodeGenFunction::EmitCall(const C assert(NumIRArgs == 1); if (!I->isAggregate()) { // Make a temporary alloca to pass the argument. -Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "indirect-arg-temp", /*Alloca=*/nullptr, - /*Cast=*/false); +Address Addr = CreateMemTempWithoutCast( +I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); IRCallArgs[FirstIRArg] = Addr.getPointer(); I->copyInto(*this, Addr); @@ -3935,9 +3934,8 @@ RValue CodeGenFunction::EmitCall(const C } if (NeedCopy) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "byval-temp", /*Alloca=*/nullptr, - /*Cast=*/false); + Address AI = CreateMemTempWithoutCast( + I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); IRCallArgs[FirstIRArg] = AI.getPointer(); I->copyInto(*this, AI); } else { Modified: cfe/trunk/lib/CodeGen/CGCleanup.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCleanup.cpp?rev=332982&r1=332981&r2=332982&view=diff == --- cfe/trunk/lib/CodeGen/CGCleanup.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCleanup.cpp Tue May 22 07:36:26 2018 @@ -283,8 +283,8 @@ void EHScopeStack::popNullFixups() { void CodeGenFunction::initFullExprCleanup() { // Create a variable to decide whether the cleanup needs to be run. - Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), -"cleanup.cond"); + Address active = CreateTempAllocaWithoutCast( + Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond"); // Initialize it to false at a site that's guaranteed to be run // before each evaluation. Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=332982&r1=332981&r2=332982&view=diff == --- cfe/trunk/lib/CodeGen/CGExpr.cpp (original) +++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue May 22 07:36:26 2018 @@ -61,21 +61,30 @@ llvm::Value *CodeGenFunction::EmitCastTo /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. +Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, + CharUnits Align, + const Twine &Name, + llvm::Value *ArraySize) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); + Alloca->setAlignment(Align.getQuantity()); + return Address(Alloca, Align); +} + +/// CreateTempAlloca - This creates a alloca and inserts it into the entry +/// block. The alloca is casted to default address space if necessary. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, const Twine &Name, llvm::Value *ArraySize, - Address *AllocaAddr, - bool CastToDefaultAddrSpace) { - auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); - Alloca->setAlignment(Align.getQuantity()); + Address *AllocaAddr) { + auto Alloca = CreateTempAllocaWithoutCast(Ty, Al
r332991 - Revert r332982 Call CreateTempMemWithoutCast for ActiveFlag
Author: yaxunl Date: Tue May 22 09:13:07 2018 New Revision: 332991 URL: http://llvm.org/viewvc/llvm-project?rev=332991&view=rev Log: Revert r332982 Call CreateTempMemWithoutCast for ActiveFlag Due to regression on arm. Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGCleanup.cpp cfe/trunk/lib/CodeGen/CGExpr.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=332991&r1=332990&r2=332991&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Tue May 22 09:13:07 2018 @@ -3888,8 +3888,9 @@ RValue CodeGenFunction::EmitCall(const C assert(NumIRArgs == 1); if (!I->isAggregate()) { // Make a temporary alloca to pass the argument. -Address Addr = CreateMemTempWithoutCast( -I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); +Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "indirect-arg-temp", /*Alloca=*/nullptr, + /*Cast=*/false); IRCallArgs[FirstIRArg] = Addr.getPointer(); I->copyInto(*this, Addr); @@ -3934,8 +3935,9 @@ RValue CodeGenFunction::EmitCall(const C } if (NeedCopy) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTempWithoutCast( - I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); + Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "byval-temp", /*Alloca=*/nullptr, + /*Cast=*/false); IRCallArgs[FirstIRArg] = AI.getPointer(); I->copyInto(*this, AI); } else { Modified: cfe/trunk/lib/CodeGen/CGCleanup.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCleanup.cpp?rev=332991&r1=332990&r2=332991&view=diff == --- cfe/trunk/lib/CodeGen/CGCleanup.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCleanup.cpp Tue May 22 09:13:07 2018 @@ -283,8 +283,8 @@ void EHScopeStack::popNullFixups() { void CodeGenFunction::initFullExprCleanup() { // Create a variable to decide whether the cleanup needs to be run. - Address active = CreateTempAllocaWithoutCast( - Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond"); + Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), +"cleanup.cond"); // Initialize it to false at a site that's guaranteed to be run // before each evaluation. Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=332991&r1=332990&r2=332991&view=diff == --- cfe/trunk/lib/CodeGen/CGExpr.cpp (original) +++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue May 22 09:13:07 2018 @@ -61,30 +61,21 @@ llvm::Value *CodeGenFunction::EmitCastTo /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. -Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, - CharUnits Align, - const Twine &Name, - llvm::Value *ArraySize) { - auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); - Alloca->setAlignment(Align.getQuantity()); - return Address(Alloca, Align); -} - -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. The alloca is casted to default address space if necessary. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, const Twine &Name, llvm::Value *ArraySize, - Address *AllocaAddr) { - auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); + Address *AllocaAddr, + bool CastToDefaultAddrSpace) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); + Alloca->setAlignment(Align.getQuantity()); if (AllocaAddr) -*AllocaAddr = Alloca; - llvm::Value *V = Alloca.getPointer(); +*AllocaAddr = Address(Alloca, Align); + llvm::Value *V = Alloca; // Alloca always returns a pointer in alloca address space, which may // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - if (getASTAllocaAddressSpace()
r333483 - Add action builder for HIP
Author: yaxunl Date: Tue May 29 17:49:10 2018 New Revision: 333483 URL: http://llvm.org/viewvc/llvm-project?rev=333483&view=rev Log: Add action builder for HIP To support separate compile/link and linking across device IR in different source files, a new HIP action builder is introduced. Basically it compiles/links host and device code separately, and embed fat binary in host linking stage through linker script. Differential Revision: https://reviews.llvm.org/D46476 Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/cuda-phases.cu Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=333483&r1=333482&r2=333483&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Tue May 29 17:49:10 2018 @@ -2151,9 +2151,10 @@ class OffloadingActionBuilder final { } }; - /// CUDA action builder. It injects device code in the host backend - /// action. - class CudaActionBuilder final : public DeviceActionBuilder { + /// Base class for CUDA/HIP action builder. It injects device code in + /// the host backend action. + class CudaActionBuilderBase : public DeviceActionBuilder { + protected: /// Flags to signal if the user requested host-only or device-only /// compilation. bool CompileHostOnly = false; @@ -2170,115 +2171,11 @@ class OffloadingActionBuilder final { /// Flag that is set to true if this builder acted on the current input. bool IsActive = false; - public: -CudaActionBuilder(Compilation &C, DerivedArgList &Args, - const Driver::InputList &Inputs) -: DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {} - -ActionBuilderReturnCode -getDeviceDependences(OffloadAction::DeviceDependences &DA, - phases::ID CurPhase, phases::ID FinalPhase, - PhasesTy &Phases) override { - if (!IsActive) -return ABRT_Inactive; - - // If we don't have more CUDA actions, we don't have any dependences to - // create for the host. - if (CudaDeviceActions.empty()) -return ABRT_Success; - - assert(CudaDeviceActions.size() == GpuArchList.size() && - "Expecting one action per GPU architecture."); - assert(!CompileHostOnly && - "Not expecting CUDA actions in host-only compilation."); - - // If we are generating code for the device or we are in a backend phase, - // we attempt to generate the fat binary. We compile each arch to ptx and - // assemble to cubin, then feed the cubin *and* the ptx into a device - // "link" action, which uses fatbinary to combine these cubins into one - // fatbin. The fatbin is then an input to the host action if not in - // device-only mode. - if (CompileDeviceOnly || CurPhase == phases::Backend) { -ActionList DeviceActions; -for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - // Produce the device action from the current phase up to the assemble - // phase. - for (auto Ph : Phases) { -// Skip the phases that were already dealt with. -if (Ph < CurPhase) - continue; -// We have to be consistent with the host final phase. -if (Ph > FinalPhase) - break; - -CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( -C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda); - -if (Ph == phases::Assemble) - break; - } - - // If we didn't reach the assemble phase, we can't generate the fat - // binary. We don't need to generate the fat binary if we are not in - // device-only mode. - if (!isa(CudaDeviceActions[I]) || - CompileDeviceOnly) -continue; - - Action *AssembleAction = CudaDeviceActions[I]; - assert(AssembleAction->getType() == types::TY_Object); - assert(AssembleAction->getInputs().size() == 1); - - Action *BackendAction = AssembleAction->getInputs()[0]; - assert(BackendAction->getType() == types::TY_PP_Asm); - - for (auto &A : {AssembleAction, BackendAction}) { -OffloadAction::DeviceDependences DDep; -DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), - Action::OFK_Cuda); -DeviceActions.push_back( -C.MakeAction(DDep, A->getType())); - } -} - -// We generate the fat binary if we have device input actions. -if (!DeviceActions.empty()) { - CudaFatBinary = - C.MakeAction(DeviceActions, types::TY_CUDA_FATBIN); - - if (!CompileDeviceOnly) { -DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, -
r333484 - Add HIP toolchain
Author: yaxunl Date: Tue May 29 17:53:50 2018 New Revision: 333484 URL: http://llvm.org/viewvc/llvm-project?rev=333484&view=rev Log: Add HIP toolchain This patch adds HIP toolchain to support HIP language mode. It includes: Create specific compiler jobs for HIP. Choose specific libraries for HIP. With contribution from Greg Rodgers. Differential Revision: https://reviews.llvm.org/D45212 Added: cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/lib/Driver/ToolChains/HIP.h cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib1/ cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib1/lib1.bc cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib2/ cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib2/lib2.bc cfe/trunk/test/Driver/hip-toolchain.hip Modified: cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/Driver/CMakeLists.txt cfe/trunk/lib/Driver/Driver.cpp Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=333484&r1=333483&r2=333484&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Tue May 29 17:53:50 2018 @@ -588,6 +588,10 @@ def fno_cuda_rdc : Flag<["-"], "fno-cuda def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">; +def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group, + HelpText<"HIP device library path">; +def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group, + HelpText<"HIP device library">; def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">, Group, Flags<[NoArgumentUnused, HelpHidden]>; def dA : Flag<["-"], "dA">, Group; Modified: cfe/trunk/lib/Driver/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/CMakeLists.txt?rev=333484&r1=333483&r2=333484&view=diff == --- cfe/trunk/lib/Driver/CMakeLists.txt (original) +++ cfe/trunk/lib/Driver/CMakeLists.txt Tue May 29 17:53:50 2018 @@ -45,6 +45,7 @@ add_clang_library(clangDriver ToolChains/Fuchsia.cpp ToolChains/Gnu.cpp ToolChains/Haiku.cpp + ToolChains/HIP.cpp ToolChains/Hexagon.cpp ToolChains/Linux.cpp ToolChains/MipsLinux.cpp Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=333484&r1=333483&r2=333484&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Tue May 29 17:53:50 2018 @@ -12,6 +12,7 @@ #include "ToolChains/AMDGPU.h" #include "ToolChains/AVR.h" #include "ToolChains/Ananas.h" +#include "ToolChains/BareMetal.h" #include "ToolChains/Clang.h" #include "ToolChains/CloudABI.h" #include "ToolChains/Contiki.h" @@ -22,15 +23,15 @@ #include "ToolChains/FreeBSD.h" #include "ToolChains/Fuchsia.h" #include "ToolChains/Gnu.h" -#include "ToolChains/BareMetal.h" +#include "ToolChains/HIP.h" #include "ToolChains/Haiku.h" #include "ToolChains/Hexagon.h" #include "ToolChains/Lanai.h" #include "ToolChains/Linux.h" +#include "ToolChains/MSVC.h" #include "ToolChains/MinGW.h" #include "ToolChains/Minix.h" #include "ToolChains/MipsLinux.h" -#include "ToolChains/MSVC.h" #include "ToolChains/Myriad.h" #include "ToolChains/NaCl.h" #include "ToolChains/NetBSD.h" @@ -70,9 +71,9 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/StringSaver.h" #include #include #include @@ -540,7 +541,7 @@ void Driver::CreateOffloadingDeviceToolC // // CUDA/HIP // - // We need to generate a CUDA toolchain if any of the inputs has a CUDA + // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA // or HIP type. However, mixed CUDA/HIP compilation is not supported. bool IsCuda = llvm::any_of(Inputs, [](std::pair &I) { @@ -556,21 +557,15 @@ void Driver::CreateOffloadingDeviceToolC Diag(clang::diag::err_drv_mix_cuda_hip); return; } - if (IsCuda || IsHIP) { + if (IsCuda) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); const llvm::Triple &HostTriple = HostTC->getTriple(); StringRef DeviceTripleStr; -auto OFK = IsHIP ? Action::OFK_HIP : Action::OFK_Cuda; -if (IsHIP) { - // HIP is only supported on amdgcn. - DeviceTripleStr = "amdgcn-amd-amdhsa"; -} else { - // CUDA is only supported on nvptx. - DeviceTripleStr = HostTriple.isArch64
r314452 - [AMDGPU] Allow flexible register names in inline asm constraints
Author: yaxunl Date: Thu Sep 28 12:07:59 2017 New Revision: 314452 URL: http://llvm.org/viewvc/llvm-project?rev=314452&view=rev Log: [AMDGPU] Allow flexible register names in inline asm constraints Currently AMDGPU inline asm only allow v and s as register names in constraints. This patch allows the following register names in constraints: (n, m is unsigned integer, n < m) v s {vn} or {v[n]} {sn} or {s[n]} {S} , where S is a special register name {v[n:m]} {s[n:m]} Differential Revision: https://reviews.llvm.org/D37568 Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=314452&r1=314451&r2=314452&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Thu Sep 28 12:07:59 2017 @@ -17,6 +17,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" @@ -115,17 +116,83 @@ public: return None; } + /// Accepted register names: (n, m is unsigned integer, n < m) + /// v + /// s + /// {vn}, {v[n]} + /// {sn}, {s[n]} + /// {S} , where S is a special register name + {v[n:m]} + /// {s[n:m]} bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &Info) const override { -switch (*Name) { -default: - break; -case 'v': // vgpr -case 's': // sgpr +static const ::llvm::StringSet<> SpecialRegs({ +"exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma", +"flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo", +"exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi", +}); + +StringRef S(Name); +bool HasLeftParen = false; +if (S.front() == '{') { + HasLeftParen = true; + S = S.drop_front(); +} +if (S.empty()) + return false; +if (S.front() != 'v' && S.front() != 's') { + if (!HasLeftParen) +return false; + auto E = S.find('}'); + if (!SpecialRegs.count(S.substr(0, E))) +return false; + S = S.drop_front(E + 1); + if (!S.empty()) +return false; + // Found {S} where S is a special register. + Info.setAllowsRegister(); + Name = S.data() - 1; + return true; +} +S = S.drop_front(); +if (!HasLeftParen) { + if (!S.empty()) +return false; + // Found s or v. Info.setAllowsRegister(); + Name = S.data() - 1; return true; } -return false; +bool HasLeftBracket = false; +if (!S.empty() && S.front() == '[') { + HasLeftBracket = true; + S = S.drop_front(); +} +unsigned long long N; +if (S.empty() || consumeUnsignedInteger(S, 10, N)) + return false; +if (!S.empty() && S.front() == ':') { + if (!HasLeftBracket) +return false; + S = S.drop_front(); + unsigned long long M; + if (consumeUnsignedInteger(S, 10, M) || N >= M) +return false; +} +if (HasLeftBracket) { + if (S.empty() || S.front() != ']') +return false; + S = S.drop_front(); +} +if (S.empty() || S.front() != '}') + return false; +S = S.drop_front(); +if (!S.empty()) + return false; +// Found {vn}, {sn}, {v[n]}, {s[n]}, {v[n:m]}, or {s[n:m]}. +Info.setAllowsRegister(); +Name = S.data() - 1; +return true; } bool Modified: cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl?rev=314452&r1=314451&r2=314452&view=diff == --- cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl (original) +++ cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl Thu Sep 28 12:07:59 2017 @@ -1,6 +1,7 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only %s -// expected-no-diagnostics +// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void test () { @@ -9,6 +10,67 @@ kernel void test () { // sgpr constraints __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}} +
r314802 - [OpenCL] Fix checking of vector type casting
Author: yaxunl Date: Tue Oct 3 07:34:29 2017 New Revision: 314802 URL: http://llvm.org/viewvc/llvm-project?rev=314802&view=rev Log: [OpenCL] Fix checking of vector type casting Currently clang allows the following code int a; int b = (const int) a; However it does not the following code int4 a; int4 b = (const int4) a; This is because Clang compares the qualified types instead of unqualified types for vector type casting, which causes the inconsistency. This patch fixes that. Differential Revision: https://reviews.llvm.org/D38463 Modified: cfe/trunk/lib/Sema/SemaExpr.cpp cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl Modified: cfe/trunk/lib/Sema/SemaExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=314802&r1=314801&r2=314802&view=diff == --- cfe/trunk/lib/Sema/SemaExpr.cpp (original) +++ cfe/trunk/lib/Sema/SemaExpr.cpp Tue Oct 3 07:34:29 2017 @@ -6033,9 +6033,9 @@ ExprResult Sema::CheckExtVectorCast(Sour // In OpenCL, casts between vectors of different types are not allowed. // (See OpenCL 6.2). if (SrcTy->isVectorType()) { -if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) -|| (getLangOpts().OpenCL && -(DestTy.getCanonicalType() != SrcTy.getCanonicalType( { +if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) || +(getLangOpts().OpenCL && + !Context.hasSameUnqualifiedType(DestTy, SrcTy))) { Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors) << DestTy << SrcTy << R; return ExprError(); Modified: cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl?rev=314802&r1=314801&r2=314802&view=diff == --- cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl (original) +++ cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl Tue Oct 3 07:34:29 2017 @@ -5,10 +5,18 @@ typedef int int4 __attribute((ext_vector typedef int int3 __attribute((ext_vector_type(3))); typedef unsigned uint3 __attribute((ext_vector_type(3))); -void vector_conv_invalid() { +void vector_conv_invalid(const global int4 *const_global_ptr) { uint4 u = (uint4)(1); int4 i = u; // expected-error{{initializing 'int4' (vector of 4 'int' values) with an expression of incompatible type 'uint4' (vector of 4 'unsigned int' values)}} int4 e = (int4)u; // expected-error{{invalid conversion between ext-vector type 'int4' (vector of 4 'int' values) and 'uint4' (vector of 4 'unsigned int' values)}} uint3 u4 = (uint3)u; // expected-error{{invalid conversion between ext-vector type 'uint3' (vector of 3 'unsigned int' values) and 'uint4' (vector of 4 'unsigned int' values)}} + + e = (const int4)i; + e = (constant int4)i; + e = (private int4)i; + + private int4 *private_ptr = (const private int4 *)const_global_ptr; // expected-error{{casting 'const __global int4 *' to type 'const int4 *' changes address space of pointer}} + global int4 *global_ptr = const_global_ptr; // expected-warning {{initializing '__global int4 *' with an expression of type 'const __global int4 *' discards qualifiers}} + global_ptr = (global int4 *)const_global_ptr; } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r314932 - [OpenCL] Clean up and add missing fields for block struct
Author: yaxunl Date: Wed Oct 4 13:32:17 2017 New Revision: 314932 URL: http://llvm.org/viewvc/llvm-project?rev=314932&view=rev Log: [OpenCL] Clean up and add missing fields for block struct Currently block is translated to a structure equivalent to struct Block { void *isa; int flags; int reserved; void *invoke; void *descriptor; }; Except invoke, which is the pointer to the block invoke function, all other fields are useless for OpenCL, which clutter the IR and also waste memory since the block struct is passed to the block invoke function as argument. On the other hand, the size and alignment of the block struct is not stored in the struct, which causes difficulty to implement __enqueue_kernel as library function, since the library function needs to know the size and alignment of the argument which needs to be passed to the kernel. This patch removes the useless fields from the block struct and adds size and align fields. The equivalent block struct will become struct Block { int size; int align; generic void *invoke; /* custom fields */ }; It also changes the pointer to the invoke function to be a generic pointer since the address space of a function may not be private on certain targets. Differential Revision: https://reviews.llvm.org/D37822 Removed: cfe/trunk/test/CodeGen/blocks-opencl.cl Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h cfe/trunk/lib/CodeGen/TargetInfo.h cfe/trunk/test/CodeGenOpenCL/blocks.cl cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=314932&r1=314931&r2=314932&view=diff == --- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Wed Oct 4 13:32:17 2017 @@ -14,11 +14,13 @@ #include "CGBlocks.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "TargetInfo.h" #include "clang/AST/DeclObjC.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -302,21 +304,55 @@ static CharUnits getLowBit(CharUnits v) static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, SmallVectorImpl &elementTypes) { - // The header is basically 'struct { void *; int; int; void *; void *; }'. - // Assert that that struct is packed. - assert(CGM.getIntSize() <= CGM.getPointerSize()); - assert(CGM.getIntAlign() <= CGM.getPointerAlign()); - assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); - - info.BlockAlign = CGM.getPointerAlign(); - info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); assert(elementTypes.empty()); - elementTypes.push_back(CGM.VoidPtrTy); - elementTypes.push_back(CGM.IntTy); - elementTypes.push_back(CGM.IntTy); - elementTypes.push_back(CGM.VoidPtrTy); - elementTypes.push_back(CGM.getBlockDescriptorType()); + if (CGM.getLangOpts().OpenCL) { +// The header is basically 'struct { int; int; generic void *; +// custom_fields; }'. Assert that struct is packed. +auto GenPtrAlign = CharUnits::fromQuantity( +CGM.getTarget().getPointerAlign(LangAS::opencl_generic) / 8); +auto GenPtrSize = CharUnits::fromQuantity( +CGM.getTarget().getPointerWidth(LangAS::opencl_generic) / 8); +assert(CGM.getIntSize() <= GenPtrSize); +assert(CGM.getIntAlign() <= GenPtrAlign); +assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); +elementTypes.push_back(CGM.IntTy); /* total size */ +elementTypes.push_back(CGM.IntTy); /* align */ +elementTypes.push_back( +CGM.getOpenCLRuntime() +.getGenericVoidPointerType()); /* invoke function */ +unsigned Offset = +2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); +unsigned BlockAlign = GenPtrAlign.getQuantity(); +if (auto *Helper = +CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { +// TargetOpenCLBlockHelp needs to make sure the struct is packed. +// If necessary, add padding fields to the custom fields. +unsigned Align = CGM.getDataLayout().getABITypeAlignment(I); +if (BlockAlign < Align) + BlockAlign = Align; +assert(Offset % Align == 0); +Offset += CGM.getDataLayout().getTypeAllocSize(I); +elementTypes.push_back(I); + } +} +info.BlockAlign = CharUnits::fromQuantity(BlockAlign); +info.BlockSize = CharUnits::fromQuantity(Offset); + } else { +// The header
r315668 - [OpenCL] Add LangAS::opencl_private to represent private address space in AST
Author: yaxunl Date: Thu Oct 12 20:37:48 2017 New Revision: 315668 URL: http://llvm.org/viewvc/llvm-project?rev=315668&view=rev Log: [OpenCL] Add LangAS::opencl_private to represent private address space in AST Currently Clang uses default address space (0) to represent private address space for OpenCL in AST. There are two issues with this: Multiple address spaces including private address space cannot be diagnosed. There is no mangling for default address space. For example, if private int* is emitted as i32 addrspace(5)* in IR. It is supposed to be mangled as PUAS5i but it is mangled as Pi instead. This patch attempts to represent OpenCL private address space explicitly in AST. It adds a new enum LangAS::opencl_private and adds it to the variable types which are implicitly private: automatic variables without address space qualifier function parameter pointee type without address space qualifier (OpenCL 1.2 and below) Differential Revision: https://reviews.llvm.org/D35082 Removed: cfe/trunk/test/SemaOpenCL/extern.cl Modified: cfe/trunk/include/clang/Basic/AddressSpaces.h cfe/trunk/lib/AST/ASTContext.cpp cfe/trunk/lib/AST/Expr.cpp cfe/trunk/lib/AST/ItaniumMangle.cpp cfe/trunk/lib/AST/TypePrinter.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/lib/Basic/Targets/NVPTX.h cfe/trunk/lib/Basic/Targets/SPIR.h cfe/trunk/lib/Basic/Targets/TCE.h cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/lib/Sema/SemaDecl.cpp cfe/trunk/lib/Sema/SemaType.cpp cfe/trunk/test/CodeGenOpenCL/address-spaces-mangling.cl cfe/trunk/test/CodeGenOpenCL/address-spaces.cl cfe/trunk/test/SemaOpenCL/address-spaces.cl cfe/trunk/test/SemaOpenCL/cl20-device-side-enqueue.cl cfe/trunk/test/SemaOpenCL/storageclass-cl20.cl cfe/trunk/test/SemaOpenCL/storageclass.cl cfe/trunk/test/SemaTemplate/address_space-dependent.cpp Modified: cfe/trunk/include/clang/Basic/AddressSpaces.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AddressSpaces.h?rev=315668&r1=315667&r2=315668&view=diff == --- cfe/trunk/include/clang/Basic/AddressSpaces.h (original) +++ cfe/trunk/include/clang/Basic/AddressSpaces.h Thu Oct 12 20:37:48 2017 @@ -25,16 +25,17 @@ namespace LangAS { /// enum ID { // The default value 0 is the value used in QualType for the the situation - // where there is no address space qualifier. For most languages, this also - // corresponds to the situation where there is no address space qualifier in - // the source code, except for OpenCL, where the address space value 0 in - // QualType represents private address space in OpenCL source code. + // where there is no address space qualifier. Default = 0, // OpenCL specific address spaces. + // In OpenCL each l-value must have certain non-default address space, each + // r-value must have no address space (i.e. the default address space). The + // pointee of a pointer must have non-default address space. opencl_global, opencl_local, opencl_constant, + opencl_private, opencl_generic, // CUDA specific address spaces. Modified: cfe/trunk/lib/AST/ASTContext.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=315668&r1=315667&r2=315668&view=diff == --- cfe/trunk/lib/AST/ASTContext.cpp (original) +++ cfe/trunk/lib/AST/ASTContext.cpp Thu Oct 12 20:37:48 2017 @@ -707,6 +707,7 @@ static const LangAS::Map *getAddressSpac 1, // opencl_global 3, // opencl_local 2, // opencl_constant + 0, // opencl_private 4, // opencl_generic 5, // cuda_device 6, // cuda_constant Modified: cfe/trunk/lib/AST/Expr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=315668&r1=315667&r2=315668&view=diff == --- cfe/trunk/lib/AST/Expr.cpp (original) +++ cfe/trunk/lib/AST/Expr.cpp Thu Oct 12 20:37:48 2017 @@ -3293,20 +3293,20 @@ Expr::isNullPointerConstant(ASTContext & // Check that it is a cast to void*. if (const PointerType *PT = CE->getType()->getAs()) { QualType Pointee = PT->getPointeeType(); -Qualifiers Q = Pointee.getQualifiers(); -// In OpenCL v2.0 generic address space acts as a placeholder -// and should be ignored. -bool IsASValid = true; -if (Ctx.getLangOpts().OpenCLVersion >= 200) { - if (Pointee.getAddressSpace() == LangAS::opencl_generic) -Q.removeAddressSpace(); - else -IsASValid = false; -} +// Only (void*)0 or equivalent are treated as nullptr. If pointee type +// has non-default address space it is not treated as nullptr. +// (__generic void*)0 in OpenCL 2.0 should not
r315678 - Fix regression of test/CodeGenOpenCL/address-spaces.cl on ppc
Author: yaxunl Date: Fri Oct 13 06:53:06 2017 New Revision: 315678 URL: http://llvm.org/viewvc/llvm-project?rev=315678&view=rev Log: Fix regression of test/CodeGenOpenCL/address-spaces.cl on ppc Modified: cfe/trunk/test/CodeGenOpenCL/address-spaces.cl Modified: cfe/trunk/test/CodeGenOpenCL/address-spaces.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/address-spaces.cl?rev=315678&r1=315677&r2=315678&view=diff == --- cfe/trunk/test/CodeGenOpenCL/address-spaces.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/address-spaces.cl Fri Oct 13 06:53:06 2017 @@ -102,7 +102,8 @@ void test_struct() { // SPIR-LABEL: define void @test_void_par() void test_void_par(void) {} -// SPIR-LABEL: define i32 @test_func_return_type() +// On ppc64 returns signext i32. +// SPIR-LABEL: define{{.*}} i32 @test_func_return_type() int test_func_return_type(void) { return 0; } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r315804 - [OpenCL] Emit enqueued block as kernel
Author: yaxunl Date: Sat Oct 14 05:23:50 2017 New Revision: 315804 URL: http://llvm.org/viewvc/llvm-project?rev=315804&view=rev Log: [OpenCL] Emit enqueued block as kernel In OpenCL the kernel function and non-kernel function has different calling conventions. For certain targets they have different argument ABIs. Also kernels have special function attributes and metadata for runtime to launch them. The blocks passed to enqueue_kernel is supposed to be executed as kernels. As such, the block invoke function should be emitted as kernel with proper calling convention and argument ABI. This patch emits enqueued block as kernel. If a block is both called directly and passed to enqueue_kernel, separate functions will be generated. Differential Revision: https://reviews.llvm.org/D38134 Added: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/lib/CodeGen/CodeGenTypes.h cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/lib/CodeGen/TargetInfo.h cfe/trunk/test/CodeGenOpenCL/blocks.cl cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=315804&r1=315803&r2=315804&view=diff == --- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Sat Oct 14 05:23:50 2017 @@ -738,16 +738,27 @@ void CodeGenFunction::destroyBlockInfos( } /// Emit a block literal expression in the current function. -llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, + llvm::Function **InvokeF) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { -if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) +// The block literal is emitted as a global variable, and the block invoke +// function has to be extracted from its initializer. +if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { + if (InvokeF) { +auto *GV = cast( +cast(Block)->stripPointerCasts()); +auto *BlockInit = cast(GV->getInitializer()); +*InvokeF = cast( +BlockInit->getAggregateElement(2)->stripPointerCasts()); + } return Block; +} CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; -return EmitBlockLiteral(blockInfo); +return EmitBlockLiteral(blockInfo, InvokeF); } // Find the block info for this block and take ownership of it. @@ -756,10 +767,11 @@ llvm::Value *CodeGenFunction::EmitBlockL blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo); + return EmitBlockLiteral(*blockInfo, InvokeF); } -llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, + llvm::Function **InvokeF) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; auto GenVoidPtrTy = IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; @@ -768,9 +780,11 @@ llvm::Value *CodeGenFunction::EmitBlockL CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); - llvm::Constant *blockFn = CodeGenFunction(CGM, true).GenerateBlockFunction( + auto *InvokeFn = CodeGenFunction(CGM, true).GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); - blockFn = llvm::ConstantExpr::getPointerCast(blockFn, GenVoidPtrTy); + if (InvokeF) +*InvokeF = InvokeFn; + auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=315804&r1=315803&r2=315804&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sat Oct 14 05:23:50 2017 @@ -2779,12 +2779,17 @@ RValue CodeGenFunction::EmitBuiltinExpr( // The most basic form
r315805 - Fix build failure on android due to missing std::to_string()
Author: yaxunl Date: Sat Oct 14 05:51:52 2017 New Revision: 315805 URL: http://llvm.org/viewvc/llvm-project?rev=315805&view=rev Log: Fix build failure on android due to missing std::to_string() Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=315805&r1=315804&r2=315805&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Sat Oct 14 05:51:52 2017 @@ -25,6 +25,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" @@ -8994,7 +8995,7 @@ llvm::Function *AMDGPUTargetCodeGenInfo: ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); ArgTypeQuals.push_back(llvm::MDString::get(C, "")); ArgNames.push_back( -llvm::MDString::get(C, std::string("local_arg") + std::to_string(I))); +llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); } std::string Name = Invoke->getName().str() + "_kernel"; auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r316000 - CodeGen: Fix invalid bitcasts for atomic builtins
Author: yaxunl Date: Tue Oct 17 07:19:29 2017 New Revision: 316000 URL: http://llvm.org/viewvc/llvm-project?rev=316000&view=rev Log: CodeGen: Fix invalid bitcasts for atomic builtins Currently clang assumes the temporary variables emitted during codegen of atomic builtins have address space 0, which is not true for target triple amdgcn---amdgiz and causes invalid bitcasts. This patch fixes that. Differential Revision: https://reviews.llvm.org/D38966 Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=316000&r1=315999&r2=316000&view=diff == --- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original) +++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Tue Oct 17 07:19:29 2017 @@ -1226,7 +1226,8 @@ RValue CodeGenFunction::EmitAtomicExpr(A return RValue::get(nullptr); return convertTempToRValue( -Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), +Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( +Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1298,7 +1299,8 @@ RValue CodeGenFunction::EmitAtomicExpr(A assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } Modified: cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl?rev=316000&r1=315999&r2=316000&view=diff == --- cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl Tue Oct 17 07:19:29 2017 @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s // Also test serialization of atomic operations here, to avoid duplicating the test. -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl -// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl +// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s #ifndef ALREADY_INCLUDED #define ALREADY_INCLUDED @@ -32,22 +32,22 @@ atomic_int j; void fi1(atomic_int *i) { // CHECK-LABEL: @fi1 - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); } void fi2(atomic_int *i) { // CHECK-LABEL: @fi2 - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); } @@ -56,7 +56,7 @@ void test_addr(global atomic_int *ig, pr // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic
r316165 - [AMDGPU] Fix bug in enqueued block codegen due to an extra line
Author: yaxunl Date: Thu Oct 19 08:56:13 2017 New Revision: 316165 URL: http://llvm.org/viewvc/llvm-project?rev=316165&view=rev Log: [AMDGPU] Fix bug in enqueued block codegen due to an extra line Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=316165&r1=316164&r2=316165&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Thu Oct 19 08:56:13 2017 @@ -8985,7 +8985,6 @@ llvm::Function *AMDGPUTargetCodeGenInfo: ArgNames.push_back(llvm::MDString::get(C, "block_literal")); for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { ArgTys.push_back(InvokeFT->getParamType(I)); -ArgTys.push_back(BlockTy); ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); AccessQuals.push_back(llvm::MDString::get(C, "none")); Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl?rev=316165&r1=316164&r2=316165&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl Thu Oct 19 08:56:13 2017 @@ -18,6 +18,12 @@ kernel void test(global char *a, char b, a[0] = b; c[0] = d; }); + enqueue_kernel(default_queue, flags, ndrange, + ^(local void *lp) { + a[0] = b; + c[0] = d; + ((local int*)lp)[0] = 1; + }, 100); } // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>) @@ -33,4 +39,7 @@ kernel void test(global char *a, char b, // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*) +// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} + // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r316353 - CodeGen: Fix invalid bitcast in partial initialization of automatic arrary variable
Author: yaxunl Date: Mon Oct 23 10:49:26 2017 New Revision: 316353 URL: http://llvm.org/viewvc/llvm-project?rev=316353&view=rev Log: CodeGen: Fix invalid bitcast in partial initialization of automatic arrary variable Differential Revision: https://reviews.llvm.org/D39184 Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=316353&r1=316352&r2=316353&view=diff == --- cfe/trunk/lib/CodeGen/CGDecl.cpp (original) +++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon Oct 23 10:49:26 2017 @@ -1266,7 +1266,7 @@ void CodeGenFunction::EmitAutoVarInit(co llvm::ConstantInt::get(IntPtrTy, getContext().getTypeSizeInChars(type).getQuantity()); - llvm::Type *BP = Int8PtrTy; + llvm::Type *BP = AllocaInt8PtrTy; if (Loc.getType() != BP) Loc = Builder.CreateBitCast(Loc, BP); Modified: cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl?rev=316353&r1=316352&r2=316353&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl Mon Oct 23 10:49:26 2017 @@ -58,3 +58,11 @@ void func2(void) { const int lvc = 4; lv1 = lvc; } + +// CHECK-LABEL: define void @func3() +// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5) +// CHECK: %[[CAST:.+]] = bitcast [16 x [1 x float]] addrspace(5)* %a to i8 addrspace(5)* +// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* %[[CAST]], i8 0, i64 64, i32 4, i1 false) +void func3(void) { + float a[16][1] = {{0.}}; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r316484 - CodeGen: Fix missing debug loc due to alloca
Author: yaxunl Date: Tue Oct 24 12:14:43 2017 New Revision: 316484 URL: http://llvm.org/viewvc/llvm-project?rev=316484&view=rev Log: CodeGen: Fix missing debug loc due to alloca Builder save/restores insertion pointer when emitting addr space cast for alloca, but does not save/restore debug loc, which causes verifier failure for certain call instructions. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39069 Added: cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=316484&r1=316483&r2=316484&view=diff == --- cfe/trunk/lib/CodeGen/CGExpr.cpp (original) +++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue Oct 24 12:14:43 2017 @@ -74,12 +74,11 @@ Address CodeGenFunction::CreateTempAlloc // cast alloca to the default address space when necessary. if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); -auto CurIP = Builder.saveIP(); +llvm::IRBuilderBase::InsertPointGuard IPG(Builder); Builder.SetInsertPoint(AllocaInsertPt); V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); -Builder.restoreIP(CurIP); } return Address(V, Align); Added: cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl?rev=316484&view=auto == --- cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl (added) +++ cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl Tue Oct 24 12:14:43 2017 @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple amdgcn---amdgizcl -debug-info-kind=limited -O0 -emit-llvm -o - %s | FileCheck %s + +typedef struct +{ +int a; +} Struct; + +Struct func1(); + +void func2(Struct S); + +void func3() +{ +// CHECK: call i32 @func1() #{{[0-9]+}}, !dbg ![[LOC:[0-9]+]] +// CHECK: call void @func2(i32 %{{[0-9]+}}) #{{[0-9]+}}, !dbg ![[LOC]] +func2(func1()); +} + ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r340873 - [HIP] Fix output file extension
Author: yaxunl Date: Tue Aug 28 14:09:09 2018 New Revision: 340873 URL: http://llvm.org/viewvc/llvm-project?rev=340873&view=rev Log: [HIP] Fix output file extension OffloadBundlingJobAction constructor accepts a list of JobAction as inputs. The host JobAction is the last one. The file type of OffloadBundlingJobAction should be determined by the host JobAction (the last one) instead of the first one. Since HIP emits LLVM bitcode for device compilation, device JobAction has different file type as host Job Action. This bug causes incorrect output file extension for HIP. This patch fixes it by using the last input JobAction (host JobAction) to determine file type of OffloadBundlingJobAction. Differential Revision: https://reviews.llvm.org/D51336 Added: cfe/trunk/test/Driver/hip-output-file-name.hip Modified: cfe/trunk/lib/Driver/Action.cpp Modified: cfe/trunk/lib/Driver/Action.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=340873&r1=340872&r2=340873&view=diff == --- cfe/trunk/lib/Driver/Action.cpp (original) +++ cfe/trunk/lib/Driver/Action.cpp Tue Aug 28 14:09:09 2018 @@ -382,7 +382,7 @@ VerifyPCHJobAction::VerifyPCHJobAction(A void OffloadBundlingJobAction::anchor() {} OffloadBundlingJobAction::OffloadBundlingJobAction(ActionList &Inputs) -: JobAction(OffloadBundlingJobClass, Inputs, Inputs.front()->getType()) {} +: JobAction(OffloadBundlingJobClass, Inputs, Inputs.back()->getType()) {} void OffloadUnbundlingJobAction::anchor() {} Added: cfe/trunk/test/Driver/hip-output-file-name.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-output-file-name.hip?rev=340873&view=auto == --- cfe/trunk/test/Driver/hip-output-file-name.hip (added) +++ cfe/trunk/test/Driver/hip-output-file-name.hip Tue Aug 28 14:09:09 2018 @@ -0,0 +1,9 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -c -target x86_64-linux-gnu \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck %s + +// CHECK: {{.*}}clang-offload-bundler{{.*}}"-outputs=hip-output-file-name.o" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r340967 - Add predefined macro __gnu_linux__ for proper aux-triple
Author: yaxunl Date: Wed Aug 29 13:39:22 2018 New Revision: 340967 URL: http://llvm.org/viewvc/llvm-project?rev=340967&view=rev Log: Add predefined macro __gnu_linux__ for proper aux-triple Clang predefine macro __linx__ for aux-triple with Linux OS but does not predefine macro __gnu_linux__. This causes some compilation error for certain applications, e.g. Eigen. This patch fixes that. Differential Revision: https://reviews.llvm.org/D51441 Modified: cfe/trunk/lib/Frontend/InitPreprocessor.cpp cfe/trunk/test/Preprocessor/predefined-macros.c Modified: cfe/trunk/lib/Frontend/InitPreprocessor.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/InitPreprocessor.cpp?rev=340967&r1=340966&r2=340967&view=diff == --- cfe/trunk/lib/Frontend/InitPreprocessor.cpp (original) +++ cfe/trunk/lib/Frontend/InitPreprocessor.cpp Wed Aug 29 13:39:22 2018 @@ -1128,6 +1128,7 @@ static void InitializePredefinedAuxMacro if (AuxTriple.getOS() == llvm::Triple::Linux) { Builder.defineMacro("__ELF__"); Builder.defineMacro("__linux__"); +Builder.defineMacro("__gnu_linux__"); // Used in features.h. If this is omitted, math.h doesn't declare float // versions of the functions in bits/mathcalls.h. if (LangOpts.CPlusPlus) Modified: cfe/trunk/test/Preprocessor/predefined-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-macros.c?rev=340967&r1=340966&r2=340967&view=diff == --- cfe/trunk/test/Preprocessor/predefined-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-macros.c Wed Aug 29 13:39:22 2018 @@ -183,9 +183,11 @@ // CHECK-HIP: #define __HIP__ 1 // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device \ +// RUN: -aux-triple x86_64-unknown-linux -fcuda-is-device \ // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP-DEV // CHECK-HIP-DEV-NOT: #define __CUDA_ARCH__ // CHECK-HIP-DEV: #define __HIPCC__ 1 // CHECK-HIP-DEV: #define __HIP_DEVICE_COMPILE__ 1 // CHECK-HIP-DEV: #define __HIP__ 1 +// CHECK_HIP-DEV: #define __linux__ 1 +// CHECK_HIP-DEV: #define __gnu_linux__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r341077 - [HIP] Add -fvisibility hidden option to clang
Author: yaxunl Date: Thu Aug 30 08:10:20 2018 New Revision: 341077 URL: http://llvm.org/viewvc/llvm-project?rev=341077&view=rev Log: [HIP] Add -fvisibility hidden option to clang AMDGPU target need -fvisibility hidden option for clang to work around a limitation of no PLT support, otherwise there is compilation error at -O0. Differential Revision: https://reviews.llvm.org/D51434 Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/test/Driver/hip-toolchain.hip Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=341077&r1=341076&r2=341077&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Aug 30 08:10:20 2018 @@ -247,6 +247,12 @@ void HIPToolChain::addClangTargetOptions if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, false)) CC1Args.push_back("-fcuda-rdc"); + + // Default to "hidden" visibility, as object level linking will not be + // supported for the foreseeable future. + if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, + options::OPT_fvisibility_ms_compat)) +CC1Args.append({"-fvisibility", "hidden"}); } llvm::opt::DerivedArgList * Modified: cfe/trunk/test/Driver/hip-toolchain.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain.hip?rev=341077&r1=341076&r2=341077&view=diff == --- cfe/trunk/test/Driver/hip-toolchain.hip (original) +++ cfe/trunk/test/Driver/hip-toolchain.hip Thu Aug 30 08:10:20 2018 @@ -15,13 +15,15 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" -// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden" +// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" -// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden" +// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] // CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC]] [[B_BC]] ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r337540 - Sema: Fix explicit address space cast in C++
Author: yaxunl Date: Fri Jul 20 04:32:51 2018 New Revision: 337540 URL: http://llvm.org/viewvc/llvm-project?rev=337540&view=rev Log: Sema: Fix explicit address space cast in C++ Currently clang does not allow implicit cast of a pointer to a pointer type in different address space but allows C-style cast of a pointer to a pointer type in different address space. However, there is a bug in Sema causing incorrect Cast Expr in AST for the latter case, which in turn results in invalid LLVM IR in codegen. This is because Sema::IsQualificationConversion returns true for a cast of pointer to a pointer type in different address space, which in turn allows a standard conversion and results in a cast expression with no op in AST. This patch fixes that by let Sema::IsQualificationConversion returns false for a cast of pointer to a pointer type in different address space, which in turn disallows standard conversion, implicit cast, and static cast. Finally it results in an reinterpret cast and correct conversion kind is set. Differential Revision: https://reviews.llvm.org/D49294 Added: cfe/trunk/test/CodeGenCXX/address-space-cast.cpp Modified: cfe/trunk/lib/Sema/SemaCast.cpp cfe/trunk/lib/Sema/SemaOverload.cpp Modified: cfe/trunk/lib/Sema/SemaCast.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCast.cpp?rev=337540&r1=337539&r2=337540&view=diff == --- cfe/trunk/lib/Sema/SemaCast.cpp (original) +++ cfe/trunk/lib/Sema/SemaCast.cpp Fri Jul 20 04:32:51 2018 @@ -1955,6 +1955,12 @@ static bool fixOverloadedReinterpretCast return Result.isUsable(); } +static bool IsAddressSpaceConversion(QualType SrcType, QualType DestType) { + return SrcType->isPointerType() && DestType->isPointerType() && + SrcType->getAs()->getPointeeType().getAddressSpace() != + DestType->getAs()->getPointeeType().getAddressSpace(); +} + static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, QualType DestType, bool CStyle, SourceRange OpRange, @@ -2198,6 +2204,8 @@ static TryCastResult TryReinterpretCast( } else { Kind = CK_BitCast; } + } else if (IsAddressSpaceConversion(SrcType, DestType)) { +Kind = CK_AddressSpaceConversion; } else { Kind = CK_BitCast; } Modified: cfe/trunk/lib/Sema/SemaOverload.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOverload.cpp?rev=337540&r1=337539&r2=337540&view=diff == --- cfe/trunk/lib/Sema/SemaOverload.cpp (original) +++ cfe/trunk/lib/Sema/SemaOverload.cpp Fri Jul 20 04:32:51 2018 @@ -3150,6 +3150,15 @@ Sema::IsQualificationConversion(QualType = PreviousToQualsIncludeConst && ToQuals.hasConst(); } + // Allows address space promotion by language rules implemented in + // Type::Qualifiers::isAddressSpaceSupersetOf. + Qualifiers FromQuals = FromType.getQualifiers(); + Qualifiers ToQuals = ToType.getQualifiers(); + if (!ToQuals.isAddressSpaceSupersetOf(FromQuals) && + !FromQuals.isAddressSpaceSupersetOf(ToQuals)) { +return false; + } + // We are left with FromType and ToType being the pointee types // after unwrapping the original FromType and ToType the same number // of types. If we unwrapped any pointers, and if FromType and Added: cfe/trunk/test/CodeGenCXX/address-space-cast.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/address-space-cast.cpp?rev=337540&view=auto == --- cfe/trunk/test/CodeGenCXX/address-space-cast.cpp (added) +++ cfe/trunk/test/CodeGenCXX/address-space-cast.cpp Fri Jul 20 04:32:51 2018 @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s + +#define __private__ __attribute__((address_space(5))) + +void func_pchar(__private__ char *x); + +void test_cast(char *gen_ptr) { + // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: store i8 addrspace(5)* %[[cast]] + __private__ char *priv_ptr = (__private__ char *)gen_ptr; + + // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: call void @_Z10func_pcharPU3AS5c(i8 addrspace(5)* %[[cast]]) + func_pchar((__private__ char *)gen_ptr); +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r337631 - [HIP] Register/unregister device fat binary only once
Author: yaxunl Date: Fri Jul 20 15:45:24 2018 New Revision: 337631 URL: http://llvm.org/viewvc/llvm-project?rev=337631&view=rev Log: [HIP] Register/unregister device fat binary only once HIP generates one fat binary for all devices after linking. However, for each compilation unit a ctor function is emitted which register the same fat binary. Measures need to be taken to make sure the fat binary is only registered once. Currently each ctor function calls __hipRegisterFatBinary and stores the returned value to __hip_gpubin_handle. This patch changes the linkage of __hip_gpubin_handle to be linkonce so that they are shared between LLVM modules. Then this patch adds check of value of __hip_gpubin_handle to make sure __hipRegisterFatBinary is only called once. The code is equivalent to void *_gpubin_handle; void ctor() { if (__hip_gpubin_handle == 0) { __hip_gpubin_handle = __hipRegisterFatBinary(...); } // register kernels and variables. } The patch also does similar change to dtors so that __hipUnregisterFatBinary is called once. Differential Revision: https://reviews.llvm.org/D49083 Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=337631&r1=337630&r2=337631&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Fri Jul 20 15:45:24 2018 @@ -309,12 +309,24 @@ llvm::Function *CGNVCUDARuntime::makeReg } /// Creates a global constructor function for the module: +/// +/// For CUDA: /// \code /// void __cuda_module_ctor(void*) { /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob); /// __cuda_register_globals(Handle); /// } /// \endcode +/// +/// For HIP: +/// \code +/// void __hip_module_ctor(void*) { +/// if (__hip_gpubin_handle == 0) { +/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob); +/// __hip_register_globals(__hip_gpubin_handle); +/// } +/// } +/// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { bool IsHIP = CGM.getLangOpts().HIP; // No need to generate ctors/dtors if there is no GPU binary. @@ -427,22 +439,68 @@ llvm::Function *CGNVCUDARuntime::makeMod /*constant*/ true); FatbinWrapper->setSection(FatbinSectionName); - // Register binary with CUDA/HIP runtime. This is substantially different in - // default mode vs. separate compilation! - if (!RelocatableDeviceCode) { -// GpuBinaryHandle = __{cuda|hip}RegisterFatBinary(&FatbinWrapper); + // There is only one HIP fat binary per linked module, however there are + // multiple constructor functions. Make sure the fat binary is registered + // only once. The constructor functions are executed by the dynamic loader + // before the program gains control. The dynamic loader cannot execute the + // constructor functions concurrently since doing that would not guarantee + // thread safety of the loaded program. Therefore we can assume sequential + // execution of constructor functions here. + if (IsHIP) { +llvm::BasicBlock *IfBlock = +llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc); +llvm::BasicBlock *ExitBlock = +llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc); +// The name, size, and initialization pattern of this variable is part +// of HIP ABI. +GpuBinaryHandle = new llvm::GlobalVariable( +TheModule, VoidPtrPtrTy, /*isConstant=*/false, +llvm::GlobalValue::LinkOnceAnyLinkage, +/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), +"__hip_gpubin_handle"); +GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); +Address GpuBinaryAddr( +GpuBinaryHandle, +CharUnits::fromQuantity(GpuBinaryHandle->getAlignment())); +{ + auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); + llvm::Constant *Zero = + llvm::Constant::getNullValue(HandleValue->getType()); + llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero); + CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock); +} +{ + CtorBuilder.SetInsertPoint(IfBlock); + // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper); + llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( + RegisterFatbinFunc, + CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy)); + CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr); + CtorBuilder.CreateBr(ExitBlock); +} +{ + CtorBuilder.SetInsertPoint(ExitBlock); + // Call __hip_register_globals(GpuBinaryHandle); + if (RegisterGlobalsFunc) { +auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr); +CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue); + } +} + } else if (!Relocat
r337639 - [HIP] Support -fcuda-flush-denormals-to-zero for amdgcn
Author: yaxunl Date: Fri Jul 20 19:02:22 2018 New Revision: 337639 URL: http://llvm.org/viewvc/llvm-project?rev=337639&view=rev Log: [HIP] Support -fcuda-flush-denormals-to-zero for amdgcn Differential Revision: https://reviews.llvm.org/D48287 Modified: cfe/trunk/include/clang/Basic/LangOptions.def cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/Frontend/CompilerInvocation.cpp cfe/trunk/test/CodeGenCUDA/flush-denormals.cu Modified: cfe/trunk/include/clang/Basic/LangOptions.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=337639&r1=337638&r2=337639&view=diff == --- cfe/trunk/include/clang/Basic/LangOptions.def (original) +++ cfe/trunk/include/clang/Basic/LangOptions.def Fri Jul 20 19:02:22 2018 @@ -209,7 +209,6 @@ LANGOPT(RenderScript , 1, 0, "Rende LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device") LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code") LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__") -LANGOPT(CUDADeviceFlushDenormalsToZero, 1, 0, "flushing denormals to zero") LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(CUDARelocatableDeviceCode, 1, 0, "generate relocatable device code") Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=337639&r1=337638&r2=337639&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Fri Jul 20 19:02:22 2018 @@ -1800,7 +1800,7 @@ void CodeGenModule::ConstructDefaultFnAt FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // Respect -fcuda-flush-denormals-to-zero. -if (getLangOpts().CUDADeviceFlushDenormalsToZero) +if (CodeGenOpts.FlushDenorm) FuncAttrs.addAttribute("nvptx-f32ftz", "true"); } } Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=337639&r1=337638&r2=337639&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri Jul 20 19:02:22 2018 @@ -526,7 +526,7 @@ void CodeGenModule::Release() { // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", - LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0); + CodeGenOpts.FlushDenorm ? 1 : 0); } // Emit OpenCL specific module metadata: OpenCL/SPIR version. Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=337639&r1=337638&r2=337639&view=diff == --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original) +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Fri Jul 20 19:02:22 2018 @@ -690,7 +690,9 @@ static bool ParseCodeGenArgs(CodeGenOpti Args.hasArg(OPT_cl_unsafe_math_optimizations) || Args.hasArg(OPT_cl_fast_relaxed_math)); Opts.Reassociate = Args.hasArg(OPT_mreassociate); - Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero); + Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero) || + (Args.hasArg(OPT_fcuda_is_device) && + Args.hasArg(OPT_fcuda_flush_denormals_to_zero)); Opts.CorrectlyRoundedDivSqrt = Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt); Opts.UniformWGSize = @@ -2191,9 +2193,6 @@ static void ParseLangArgs(LangOptions &O if (Args.hasArg(OPT_fno_cuda_host_device_constexpr)) Opts.CUDAHostDeviceConstexpr = 0; - if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_flush_denormals_to_zero)) -Opts.CUDADeviceFlushDenormalsToZero = 1; - if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals)) Opts.CUDADeviceApproxTranscendentals = 1; Modified: cfe/trunk/test/CodeGenCUDA/flush-denormals.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/flush-denormals.cu?rev=337639&r1=337638&r2=337639&view=diff == --- cfe/trunk/test/CodeGenCUDA/flush-denormals.cu (original) +++ cfe/trunk/test/CodeGenCUDA/flush-denormals.cu Fri Jul 20 19:02:22 2018 @@ -5,6 +5,13 @@ // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ // RUN: FileCheck %s -check-prefix CHECK -check-prefix FTZ +// RUN: %clang_cc1 -fcuda-is-device -x hip \ +// RUN: -triple
r337791 - Enable .hip files for test/Driver
Author: yaxunl Date: Mon Jul 23 18:03:44 2018 New Revision: 337791 URL: http://llvm.org/viewvc/llvm-project?rev=337791&view=rev Log: Enable .hip files for test/Driver Partially revert r334128 due to regressions. Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/lit.local.cfg Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=337791&r1=337790&r2=337791&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Mon Jul 23 18:03:44 2018 @@ -2809,7 +2809,7 @@ public: C.MakeAction(HostAction); UnbundlingHostAction->registerDependentActionInfo( C.getSingleOffloadToolChain(), - /*BoundArch=*/"all", Action::OFK_Host); + /*BoundArch=*/StringRef(), Action::OFK_Host); HostAction = UnbundlingHostAction; } @@ -3868,7 +3868,7 @@ InputInfo Driver::BuildJobsForActionNoCa StringRef Arch; if (TargetDeviceOffloadKind == Action::OFK_HIP) { if (UI.DependentOffloadKind == Action::OFK_Host) - Arch = "all"; + Arch = StringRef(); else Arch = UI.DependentBoundArch; } else Modified: cfe/trunk/test/Driver/lit.local.cfg URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/lit.local.cfg?rev=337791&r1=337790&r2=337791&view=diff == --- cfe/trunk/test/Driver/lit.local.cfg (original) +++ cfe/trunk/test/Driver/lit.local.cfg Mon Jul 23 18:03:44 2018 @@ -1,5 +1,5 @@ config.suffixes = ['.c', '.cpp', '.h', '.m', '.mm', '.S', '.s', '.f90', '.f95', - '.cu', '.rs', '.cl'] + '.cu', '.rs', '.cl', '.hip'] config.substitutions = list(config.substitutions) config.substitutions.insert(0, ('%clang_cc1', ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r337793 - [HIP] pass -target-cpu when running the device-mode compiler
Author: yaxunl Date: Mon Jul 23 18:40:44 2018 New Revision: 337793 URL: http://llvm.org/viewvc/llvm-project?rev=337793&view=rev Log: [HIP] pass -target-cpu when running the device-mode compiler Differential Revision: https://reviews.llvm.org/D49643 Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/test/Driver/hip-toolchain.hip Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=337793&r1=337792&r2=337793&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Mon Jul 23 18:40:44 2018 @@ -232,6 +232,8 @@ void HIPToolChain::addClangTargetOptions assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); + CC1Args.push_back("-target-cpu"); + CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, Modified: cfe/trunk/test/Driver/hip-toolchain.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain.hip?rev=337793&r1=337792&r2=337793&view=diff == --- cfe/trunk/test/Driver/hip-toolchain.hip (original) +++ cfe/trunk/test/Driver/hip-toolchain.hip Mon Jul 23 18:40:44 2018 @@ -14,14 +14,14 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-fcuda-is-device" -// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" +// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-fcuda-is-device" -// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" +// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] // CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC]] [[B_BC]] @@ -40,14 +40,14 @@ // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-fcuda-is-device" -// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc" -// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-fcuda-is-device" -// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" +// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC]] // CHECK: [[LLVM_LINK]] [[A_BC]] [[B_BC]] ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r337797 - Attempt to fix regression due to r337791
Author: yaxunl Date: Mon Jul 23 19:12:24 2018 New Revision: 337797 URL: http://llvm.org/viewvc/llvm-project?rev=337797&view=rev Log: Attempt to fix regression due to r337791 Modified: cfe/trunk/test/Driver/hip-toolchain.hip Modified: cfe/trunk/test/Driver/hip-toolchain.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain.hip?rev=337797&r1=337796&r2=337797&view=diff == --- cfe/trunk/test/Driver/hip-toolchain.hip (original) +++ cfe/trunk/test/Driver/hip-toolchain.hip Mon Jul 23 19:12:24 2018 @@ -80,5 +80,5 @@ // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*o]]" -// CHECK: [[LD:".*ld.lld"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] +// CHECK: [[LD:".*ld.*"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] // CHECK-SAME: {{.*}} "-T" "{{.*}}.lk" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r338188 - [CUDA][HIP] Allow function-scope static const variable
Author: yaxunl Date: Fri Jul 27 20:05:25 2018 New Revision: 338188 URL: http://llvm.org/viewvc/llvm-project?rev=338188&view=rev Log: [CUDA][HIP] Allow function-scope static const variable CUDA 8.0 E.3.9.4 says: Within the body of a __device__ or __global__ function, only __shared__ variables or variables without any device memory qualifiers may be declared with static storage class. It is unclear how a function-scope non-const static variable without device memory qualifier is implemented, therefore only static const variable without device memory qualifier is allowed, which can be emitted as a global variable in constant address space. Currently clang only allows function-scope static variable with __shared__ qualifier. This patch also allows function-scope static const variable without device memory qualifier and emits it as a global variable in constant address space. Differential Revision: https://reviews.llvm.org/D49931 Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/Sema/SemaDecl.cpp cfe/trunk/test/CodeGenCUDA/device-var-init.cu cfe/trunk/test/SemaCUDA/device-var-init.cu Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=338188&r1=338187&r2=338188&view=diff == --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original) +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Jul 27 20:05:25 2018 @@ -7129,7 +7129,8 @@ def err_shared_var_init : Error< "initialization is not supported for __shared__ variables.">; def err_device_static_local_var : Error< "within a %select{__device__|__global__|__host__|__host__ __device__}0 " -"function, only __shared__ variables may be marked 'static'">; +"function, only __shared__ variables or const variables without device " +"memory qualifier may be marked 'static'">; def err_cuda_vla : Error< "cannot use variable-length arrays in " "%select{__device__|__global__|__host__|__host__ __device__}0 functions">; Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=338188&r1=338187&r2=338188&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri Jul 27 20:05:25 2018 @@ -3176,6 +3176,10 @@ LangAS CodeGenModule::GetGlobalVarAddres return LangAS::cuda_constant; else if (D && D->hasAttr()) return LangAS::cuda_shared; +else if (D && D->hasAttr()) + return LangAS::cuda_device; +else if (D && D->getType().isConstQualified()) + return LangAS::cuda_constant; else return LangAS::cuda_device; } Modified: cfe/trunk/lib/Sema/SemaDecl.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDecl.cpp?rev=338188&r1=338187&r2=338188&view=diff == --- cfe/trunk/lib/Sema/SemaDecl.cpp (original) +++ cfe/trunk/lib/Sema/SemaDecl.cpp Fri Jul 27 20:05:25 2018 @@ -11914,14 +11914,25 @@ void Sema::FinalizeDeclaration(Decl *Thi NewAttr->setInherited(true); VD->addAttr(NewAttr); } - // CUDA E.2.9.4: Within the body of a __device__ or __global__ - // function, only __shared__ variables may be declared with - // static storage class. - if (getLangOpts().CUDA && !VD->hasAttr() && - CUDADiagIfDeviceCode(VD->getLocation(), - diag::err_device_static_local_var) - << CurrentCUDATarget()) -VD->setInvalidDecl(); + // CUDA 8.0 E.3.9.4: Within the body of a __device__ or __global__ + // function, only __shared__ variables or variables without any device + // memory qualifiers may be declared with static storage class. + // Note: It is unclear how a function-scope non-const static variable + // without device memory qualifier is implemented, therefore only static + // const variable without device memory qualifier is allowed. + [&]() { +if (!getLangOpts().CUDA) + return; +if (VD->hasAttr()) + return; +if (VD->getType().isConstQualified() && +!(VD->hasAttr() || VD->hasAttr())) + return; +if (CUDADiagIfDeviceCode(VD->getLocation(), + diag::err_device_static_local_var) +<< CurrentCUDATarget()) + VD->setInvalidDecl(); + }(); } } Modified: cfe/trunk/test/CodeGenCUDA/device-var-init.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-var-init.cu?rev=338188&r1=338187&r2=338188&view=diff ==
r338805 - Sema: Fix explicit address space cast involving void pointers
Author: yaxunl Date: Thu Aug 2 20:18:56 2018 New Revision: 338805 URL: http://llvm.org/viewvc/llvm-project?rev=338805&view=rev Log: Sema: Fix explicit address space cast involving void pointers Explicit cast of a void pointer to a pointer type in different address space is incorrectly classified as bitcast, which causes invalid bitcast in codegen. The patch fixes that by checking the address space of the source and destination type and set the correct cast kind. Differential Revision: https://reviews.llvm.org/D50003 Modified: cfe/trunk/lib/Sema/SemaCast.cpp cfe/trunk/test/CodeGenCXX/address-space-cast.cpp Modified: cfe/trunk/lib/Sema/SemaCast.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCast.cpp?rev=338805&r1=338804&r2=338805&view=diff == --- cfe/trunk/lib/Sema/SemaCast.cpp (original) +++ cfe/trunk/lib/Sema/SemaCast.cpp Thu Aug 2 20:18:56 2018 @@ -1044,6 +1044,17 @@ void CastOperation::CheckStaticCast() { } } +static bool IsAddressSpaceConversion(QualType SrcType, QualType DestType) { + auto *SrcPtrType = SrcType->getAs(); + if (!SrcPtrType) +return false; + auto *DestPtrType = DestType->getAs(); + if (!DestPtrType) +return false; + return SrcPtrType->getPointeeType().getAddressSpace() != + DestPtrType->getPointeeType().getAddressSpace(); +} + /// TryStaticCast - Check if a static cast can be performed, and do so if /// possible. If @p CStyle, ignore access restrictions on hierarchy casting /// and casting away constness. @@ -1185,7 +1196,9 @@ static TryCastResult TryStaticCast(Sema return TC_Failed; } } - Kind = CK_BitCast; + Kind = IsAddressSpaceConversion(SrcType, DestType) + ? CK_AddressSpaceConversion + : CK_BitCast; return TC_Success; } @@ -1964,12 +1977,6 @@ static bool fixOverloadedReinterpretCast return Result.isUsable(); } -static bool IsAddressSpaceConversion(QualType SrcType, QualType DestType) { - return SrcType->isPointerType() && DestType->isPointerType() && - SrcType->getAs()->getPointeeType().getAddressSpace() != - DestType->getAs()->getPointeeType().getAddressSpace(); -} - static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, QualType DestType, bool CStyle, SourceRange OpRange, Modified: cfe/trunk/test/CodeGenCXX/address-space-cast.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/address-space-cast.cpp?rev=338805&r1=338804&r2=338805&view=diff == --- cfe/trunk/test/CodeGenCXX/address-space-cast.cpp (original) +++ cfe/trunk/test/CodeGenCXX/address-space-cast.cpp Thu Aug 2 20:18:56 2018 @@ -3,13 +3,63 @@ #define __private__ __attribute__((address_space(5))) void func_pchar(__private__ char *x); +void func_pvoid(__private__ void *x); +void func_pint(__private__ int *x); -void test_cast(char *gen_ptr) { +void test_cast(char *gen_char_ptr, void *gen_void_ptr, int *gen_int_ptr) { // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* // CHECK-NEXT: store i8 addrspace(5)* %[[cast]] - __private__ char *priv_ptr = (__private__ char *)gen_ptr; + __private__ char *priv_char_ptr = (__private__ char *)gen_char_ptr; // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: store i8 addrspace(5)* %[[cast]] + priv_char_ptr = (__private__ char *)gen_void_ptr; + + // CHECK: %[[cast:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: store i8 addrspace(5)* %[[cast]] + priv_char_ptr = (__private__ char *)gen_int_ptr; + + // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: store i8 addrspace(5)* %[[cast]] + __private__ void *priv_void_ptr = (__private__ void *)gen_char_ptr; + + // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: store i8 addrspace(5)* %[[cast]] + priv_void_ptr = (__private__ void *)gen_void_ptr; + + // CHECK: %[[cast:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: store i8 addrspace(5)* %[[cast]] + priv_void_ptr = (__private__ void *)gen_int_ptr; + + // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i32 addrspace(5)* + // CHECK-NEXT: store i32 addrspace(5)* %[[cast]] + __private__ int *priv_int_ptr = (__private__ int *)gen_void_ptr; + + // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: call void @_Z10func_pcharPU3AS5c(i8 addrspace(5)* %[[cast]]) + func_pchar((__private__ char *)gen_char_ptr); + + // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)* + // CHECK-NEXT: call void @_Z10func_pcharPU3AS5c(i8 addrspace(5)* %[[cast]]) + func_pchar((__private__
r325031 - [AMDGPU] Change constant addr space to 4
Author: yaxunl Date: Tue Feb 13 10:01:21 2018 New Revision: 325031 URL: http://llvm.org/viewvc/llvm-project?rev=325031&view=rev Log: [AMDGPU] Change constant addr space to 4 Differential Revision: https://reviews.llvm.org/D43171 Added: cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl Removed: cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/test/CodeGen/target-data.c cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl cfe/trunk/test/CodeGenOpenCL/address-spaces.cl cfe/trunk/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl cfe/trunk/test/CodeGenOpenCL/amdgpu-nullptr.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl cfe/trunk/test/CodeGenOpenCL/cast_image.cl cfe/trunk/test/CodeGenOpenCL/opencl_types.cl cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl cfe/trunk/test/CodeGenOpenCL/size_t.cl cfe/trunk/test/CodeGenOpenCL/vla.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=325031&r1=325030&r2=325031&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Feb 13 10:01:21 2018 @@ -21,9 +21,9 @@ // SI+ only builtins. //===--===// -BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*2", "nc") -BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*2", "nc") -BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*2", "nc") +BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*4", "nc") +BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*4", "nc") +BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*4", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc") Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=325031&r1=325030&r2=325031&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Feb 13 10:01:21 2018 @@ -38,7 +38,7 @@ static const char *const DataLayoutStrin "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; static const char *const DataLayoutStringSIGenericIsZero = -"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32" +"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; @@ -46,11 +46,11 @@ static const LangASMap AMDGPUPrivIsZeroD 4, // Default 1, // opencl_global 3, // opencl_local -2, // opencl_constant +4, // opencl_constant 0, // opencl_private 4, // opencl_generic 1, // cuda_device -2, // cuda_constant +4, // cuda_constant 3 // cuda_shared }; @@ -58,11 +58,11 @@ static const LangASMap AMDGPUGenIsZeroDe 0, // Default 1, // opencl_global 3, // opencl_local -2, // opencl_constant +4, // opencl_constant 5, // opencl_private 0, // opencl_generic 1, // cuda_device -2, // cuda_constant +4, // cuda_constant 3 // cuda_shared }; @@ -70,11 +70,11 @@ static const LangASMap AMDGPUPrivIsZeroD 0, // Default 1, // opencl_global 3, // opencl_local -2, // opencl_constant +4, // opencl_constant 0, // opencl_private 4, // opencl_generic 1, // cuda_device -2, // cuda_constant +4, // cuda_constant 3 // cuda_shared }; @@ -82,11 +82,11 @@ static const LangASMap AMDGPUGenIsZeroDe 5, // Default 1, // opencl_global 3, // opencl_local -2, // opencl_constant +4, // opencl_constant 5, // opencl_private 0, // opencl_generic 1, // cuda_device -2, // cuda_constant +4, // cuda_constant 3 // cuda_shared }; } // namespace targets Modified: cfe/trunk/test/CodeGen/target-data.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/target-data.c?rev=325031&r1=325030&r2=325031&view=diff == --- cfe/trunk/test/CodeGen/target-data.c (original) +++ cfe/trunk/test/CodeGen/target-data.c Tue Feb 13 10:01:21 2018 @@ -132,12 +132,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +// R600SI: target datalayout = "e-p:64:64-p1:6
r325264 - [OpenCL] Fix __enqueue_block for block with captures
Author: yaxunl Date: Thu Feb 15 08:39:19 2018 New Revision: 325264 URL: http://llvm.org/viewvc/llvm-project?rev=325264&view=rev Log: [OpenCL] Fix __enqueue_block for block with captures The following test case causes issue with codegen of __enqueue_block void (^block)(void) = ^{ callee(id, out); }; enqueue_kernel(queue, 0, ndrange, block); Clang first does codegen for block expression in the first line and deletes its block info. Clang then tries to do codegen for the same block expression again for the second line, and fails because the block info is gone. The fix is to do normal codegen for both lines. Introduce an API to OpenCL runtime to record llvm block invoke function and llvm block literal emitted for each AST block expression, and use the recorded information for generating the wrapper kernel. The EmitBlockLiteral APIs are cleaned up to minimize changes to the normal codegen of blocks. Another minor issue is that some clean up AST expression is generated for block with captures, which can be stripped by IgnoreImplicit. Differential Revision: https://reviews.llvm.org/D43240 Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=325264&r1=325263&r2=325264&view=diff == --- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Thu Feb 15 08:39:19 2018 @@ -740,27 +740,19 @@ void CodeGenFunction::destroyBlockInfos( } /// Emit a block literal expression in the current function. -llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { // The block literal is emitted as a global variable, and the block invoke // function has to be extracted from its initializer. if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { - if (InvokeF) { -auto *GV = cast( -cast(Block)->stripPointerCasts()); -auto *BlockInit = cast(GV->getInitializer()); -*InvokeF = cast( -BlockInit->getAggregateElement(2)->stripPointerCasts()); - } return Block; } CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; -return EmitBlockLiteral(blockInfo, InvokeF); +return EmitBlockLiteral(blockInfo); } // Find the block info for this block and take ownership of it. @@ -769,11 +761,10 @@ llvm::Value *CodeGenFunction::EmitBlockL blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo, InvokeF); + return EmitBlockLiteral(*blockInfo); } -llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; auto GenVoidPtrTy = IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; @@ -788,8 +779,6 @@ llvm::Value *CodeGenFunction::EmitBlockL BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); - if (InvokeF) -*InvokeF = InvokeFn; auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. @@ -1024,6 +1013,11 @@ llvm::Value *CodeGenFunction::EmitBlockL llvm::Value *result = Builder.CreatePointerCast( blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); + if (IsOpenCL) { +CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn, + result); + } + return result; } @@ -1287,6 +1281,10 @@ static llvm::Constant *buildGlobalBlock( llvm::Constant *Result = llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); + if (CGM.getContext().getLangOpts().OpenCL) +CGM.getOpenCLRuntime().recordBlockInfo( +blockInfo.BlockExpression, +cast(blockFn->stripPointerCasts()), Result); return Result; }
r325279 - Clean up AMDGCN tests
Author: yaxunl Date: Thu Feb 15 11:12:41 2018 New Revision: 325279 URL: http://llvm.org/viewvc/llvm-project?rev=325279&view=rev Log: Clean up AMDGCN tests Differential Revision: https://reviews.llvm.org/D43340 Modified: cfe/trunk/test/CodeGen/address-space.c cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp cfe/trunk/test/CodeGenCXX/vla.cpp cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl cfe/trunk/test/CodeGenOpenCL/address-spaces.cl cfe/trunk/test/CodeGenOpenCL/blocks.cl cfe/trunk/test/CodeGenOpenCL/lifetime.cl cfe/trunk/test/CodeGenOpenCL/vla.cl cfe/trunk/test/Index/pipe-size.cl Modified: cfe/trunk/test/CodeGen/address-space.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/address-space.c?rev=325279&r1=325278&r2=325279&view=diff == --- cfe/trunk/test/CodeGen/address-space.c (original) +++ cfe/trunk/test/CodeGen/address-space.c Thu Feb 15 11:12:41 2018 @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86,GIZ %s -// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGIZ,GIZ %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86 %s +// RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGCN %s // CHECK: @foo = common addrspace(1) global int foo __attribute__((address_space(1))); @@ -24,10 +24,10 @@ __attribute__((address_space(2))) int *A // CHECK-LABEL: define void @test3() // X86: load i32 addrspace(2)*, i32 addrspace(2)** @B -// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @B to i32 addrspace(2)**) +// AMDGCN: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @B to i32 addrspace(2)**) // CHECK: load i32, i32 addrspace(2)* // X86: load i32 addrspace(2)*, i32 addrspace(2)** @A -// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @A to i32 addrspace(2)**) +// AMDGCN: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @A to i32 addrspace(2)**) // CHECK: store i32 {{.*}}, i32 addrspace(2)* void test3() { *A = *B; @@ -39,8 +39,8 @@ typedef struct { } MyStruct; // CHECK-LABEL: define void @test4( -// GIZ: call void @llvm.memcpy.p0i8.p2i8 -// GIZ: call void @llvm.memcpy.p2i8.p0i8 +// CHECK: call void @llvm.memcpy.p0i8.p2i8 +// CHECK: call void @llvm.memcpy.p2i8.p0i8 void test4(MyStruct __attribute__((address_space(2))) *pPtr) { MyStruct s = pPtr[0]; pPtr[0] = s; Modified: cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp?rev=325279&r1=325278&r2=325279&view=diff == --- cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp (original) +++ cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp Thu Feb 15 11:12:41 2018 @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -std=c++11 -triple x86_64-none-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefixes=X86,CHECK %s -// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa-amdgiz -DNO_TLS -emit-llvm -o - %s | FileCheck -check-prefixes=AMD,CHECK %s +// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -DNO_TLS -emit-llvm -o - %s | FileCheck -check-prefixes=AMDGCN,CHECK %s namespace std { typedef decltype(sizeof(int)) size_t; @@ -49,8 +49,8 @@ struct wantslist1 { }; // X86: @_ZGR15globalInitList1_ = internal constant [3 x i32] [i32 1, i32 2, i32 3] // X86: @globalInitList1 = global %{{[^ ]+}} { i32* getelementptr inbounds ([3 x i32], [3 x i32]* @_ZGR15globalInitList1_, i32 0, i32 0), i{{32|64}} 3 } -// AMD: @_ZGR15globalInitList1_ = internal addrspace(1) constant [3 x i32] [i32 1, i32 2, i32 3] -// AMD: @globalInitList1 = addrspace(1) global %{{[^ ]+}} { i32* addrspacecast (i32 addrspace(1)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(1)* @_ZGR15globalInitList1_, i32 0, i32 0) to i32*), i{{32|64}} 3 } +// AMDGCN: @_ZGR15globalInitList1_ = internal addrspace(1) constant [3 x i32] [i32 1, i32 2, i32 3] +// AMDGCN: @globalInitList1 = addrspace(1) global %{{[^ ]+}} { i32* addrspacecast (i32 addrspace(1)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(1)* @_ZGR15globalInitList1_, i32 0, i32 0) to i32*), i{{32|64}} 3 } std::initializer_list globalInitList1 = {1, 2, 3}; #ifndef NO_TLS @@ -67,8 +67,8 @@ std::initializer_list thread_local // X86: @globalInitList2 = global %{{[^ ]+}} zeroinitializer // X86: @_ZGR15globalInitList2_ = internal global [2 x %[[WITHARG:[^ ]*]]] zeroinitializer -// AMD
r305711 - CodeGen: Cast temporary variable to proper address space
Author: yaxunl Date: Mon Jun 19 12:03:41 2017 New Revision: 305711 URL: http://llvm.org/viewvc/llvm-project?rev=305711&view=rev Log: CodeGen: Cast temporary variable to proper address space In C++ all variables are in default address space. Previously change has been made to cast automatic variables to default address space. However that is not sufficient since all temporary variables need to be casted to default address space. This patch casts all temporary variables to default address space except those for passing indirect arguments since they are only used for load/store. This patch only affects target having non-zero alloca address space. Differential Revision: https://reviews.llvm.org/D33706 Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/lib/CodeGen/CGExpr.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/test/CodeGen/address-space.c cfe/trunk/test/CodeGen/default-address-space.c cfe/trunk/test/CodeGen/x86_64-arguments.c cfe/trunk/test/CodeGenCXX/amdgcn-automatic-variable.cpp Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=305711&r1=305710&r2=305711&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jun 19 12:03:41 2017 @@ -3813,7 +3813,8 @@ RValue CodeGenFunction::EmitCall(const C assert(NumIRArgs == 1); if (RV.isScalar() || RV.isComplex()) { // Make a temporary alloca to pass the argument. -Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); +Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "indirect-arg-temp", false); IRCallArgs[FirstIRArg] = Addr.getPointer(); LValue argLV = MakeAddrLValue(Addr, I->Ty); @@ -3842,7 +3843,8 @@ RValue CodeGenFunction::EmitCall(const C < Align.getQuantity()) || (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "byval-temp", false); IRCallArgs[FirstIRArg] = AI.getPointer(); EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); } else { Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=305711&r1=305710&r2=305711&view=diff == --- cfe/trunk/lib/CodeGen/CGDecl.cpp (original) +++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon Jun 19 12:03:41 2017 @@ -954,6 +954,7 @@ void CodeGenFunction::EmitLifetimeEnd(ll CodeGenFunction::AutoVarEmission CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { QualType Ty = D.getType(); + assert(Ty.getAddressSpace() == LangAS::Default); AutoVarEmission emission(D); @@ -1046,8 +1047,7 @@ CodeGenFunction::EmitAutoVarAlloca(const // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment); - address.getPointer()->setName(D.getName()); + address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName()); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of // the catch parameter starts in the catchpad instruction, and we can't @@ -1107,27 +1107,9 @@ CodeGenFunction::EmitAutoVarAlloca(const llvm::Type *llvmTy = ConvertTypeForMem(elementType); // Allocate memory for the array. -llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla"); -vla->setAlignment(alignment.getQuantity()); - -address = Address(vla, alignment); +address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount); } - // Alloca always returns a pointer in alloca address space, which may - // be different from the type defined by the language. For example, - // in C++ the auto variables are in the default address space. Therefore - // cast alloca to the expected address space when necessary. - auto T = D.getType(); - assert(T.getAddressSpace() == LangAS::Default); - if (getASTAllocaAddressSpace() != LangAS::Default) { -auto *Addr = getTargetHooks().performAddrSpaceCast( -*this, address.getPointer(), getASTAllocaAddressSpace(), -T.getAddressSpace(), -address.getElementType()->getPointerTo( -getContext().getTargetAddressSpace(T.getAddressSpace())), -/*non-null*/ true); -address = Address(Addr, address.getAlignment()); - } setAddrOfLocalVar(&D, address); emission.Addr = address; Modified
r326725 - [AMDGPU] Clean up old address space mapping and fix constant address space value
Author: yaxunl Date: Mon Mar 5 09:50:10 2018 New Revision: 326725 URL: http://llvm.org/viewvc/llvm-project?rev=326725&view=rev Log: [AMDGPU] Clean up old address space mapping and fix constant address space value Differential Revision: https://reviews.llvm.org/D43911 Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=326725&r1=326724&r2=326725&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Mon Mar 5 09:50:10 2018 @@ -32,62 +32,33 @@ static const char *const DataLayoutStrin "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; -static const char *const DataLayoutStringSIPrivateIsZero = -"e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; - -static const char *const DataLayoutStringSIGenericIsZero = +static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; -static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = { -4, // Default -1, // opencl_global -3, // opencl_local -4, // opencl_constant -0, // opencl_private -4, // opencl_generic -1, // cuda_device -4, // cuda_constant -3 // cuda_shared -}; - -static const LangASMap AMDGPUGenIsZeroDefIsGenMap = { -0, // Default -1, // opencl_global -3, // opencl_local -4, // opencl_constant -5, // opencl_private -0, // opencl_generic -1, // cuda_device -4, // cuda_constant -3 // cuda_shared +const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { +Generic, // Default +Global, // opencl_global +Local,// opencl_local +Constant, // opencl_constant +Private, // opencl_private +Generic, // opencl_generic +Global, // cuda_device +Constant, // cuda_constant +Local // cuda_shared }; -static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = { -0, // Default -1, // opencl_global -3, // opencl_local -4, // opencl_constant -0, // opencl_private -4, // opencl_generic -1, // cuda_device -4, // cuda_constant -3 // cuda_shared -}; - -static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = { -5, // Default -1, // opencl_global -3, // opencl_local -4, // opencl_constant -5, // opencl_private -0, // opencl_generic -1, // cuda_device -4, // cuda_constant -3 // cuda_shared +const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { +Private, // Default +Global, // opencl_global +Local,// opencl_local +Constant, // opencl_constant +Private, // opencl_private +Generic, // opencl_generic +Global, // cuda_device +Constant, // cuda_constant +Local // cuda_shared }; } // namespace targets } // namespace clang @@ -282,29 +253,18 @@ void AMDGPUTargetInfo::fillValidCPUList( } void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { - if (isGenericZero(getTriple())) { -AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap -: &AMDGPUGenIsZeroDefIsGenMap; - } else { -AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap -: &AMDGPUPrivIsZeroDefIsGenMap; - } + AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; } AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) - : TargetInfo(Triple), AS(isGenericZero(Triple)), -GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) { - auto IsGenericZero = isGenericZero(Triple); - resetDataLayout(isAMDGCN(getTriple()) - ? (IsGenericZero ? DataLayoutStringSIGenericIsZero - : DataLayoutStringSIPrivateIsZero) - : DataLayoutStringR600); - assert(DataLayout->getAllocaAddrSpace() == AS.Private); +: TargetInfo(Triple), + GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) { + resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN +: DataLayoutStringR600); + assert(DataLayout->getAllocaAddrSpace() == Private); setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || - Triple.getEnvironment() == llvm::Triple::OpenCL || - Triple.getEnvi
r326937 - [OpenCL] Remove block invoke function from emitted block literal struct
Author: yaxunl Date: Wed Mar 7 11:32:58 2018 New Revision: 326937 URL: http://llvm.org/viewvc/llvm-project?rev=326937&view=rev Log: [OpenCL] Remove block invoke function from emitted block literal struct OpenCL runtime tracks the invoke function emitted for any block expression. Due to restrictions on blocks in OpenCL (v2.0 s6.12.5), it is always possible to know the block invoke function when emitting call of block expression or __enqueue_kernel builtin functions. Since __enqueu_kernel already has an argument for the invoke function, it is redundant to have invoke function member in the llvm block literal structure. This patch removes invoke function from the llvm block literal structure. It also removes the bitcast of block invoke function to the generic block literal type which is useless for OpenCL. This will save some space for the kernel argument, and also eliminate some store instructions. Differential Revision: https://reviews.llvm.org/D43783 Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl cfe/trunk/test/CodeGenOpenCL/blocks.cl cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=326937&r1=326936&r2=326937&view=diff == --- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Wed Mar 7 11:32:58 2018 @@ -307,25 +307,12 @@ static void initializeForBlockHeader(Cod assert(elementTypes.empty()); if (CGM.getLangOpts().OpenCL) { -// The header is basically 'struct { int; int; generic void *; +// The header is basically 'struct { int; int; // custom_fields; }'. Assert that struct is packed. -auto GenericAS = -CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); -auto GenPtrAlign = -CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); -auto GenPtrSize = -CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); -assert(CGM.getIntSize() <= GenPtrSize); -assert(CGM.getIntAlign() <= GenPtrAlign); -assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); elementTypes.push_back(CGM.IntTy); /* total size */ elementTypes.push_back(CGM.IntTy); /* align */ -elementTypes.push_back( -CGM.getOpenCLRuntime() -.getGenericVoidPointerType()); /* invoke function */ -unsigned Offset = -2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); -unsigned BlockAlign = GenPtrAlign.getQuantity(); +unsigned Offset = 2 * CGM.getIntSize().getQuantity(); +unsigned BlockAlign = CGM.getIntAlign().getQuantity(); if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { @@ -771,20 +758,12 @@ llvm::Value *CodeGenFunction::EmitBlockL llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; - auto GenVoidPtrTy = - IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; - LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; - auto GenVoidPtrSize = CharUnits::fromQuantity( - CGM.getTarget().getPointerWidth( - CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / - 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); CodeGenFunction BlockCGF{CGM, true}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); - auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) @@ -853,11 +832,12 @@ llvm::Value *CodeGenFunction::EmitBlockL llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), getIntSize(), "block.align"); } -addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); -if (!IsOpenCL) +if (!IsOpenCL) { + addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy), + getPointerSize(), "block.invoke"); addHeaderField(descriptor, getPointerSize(), "block.descriptor"); -else if (auto *Helper = - CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { +} else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { addHeaderField( I.first, @@
r326946 - CodeGen: Fix address space of indirect function argument
Author: yaxunl Date: Wed Mar 7 13:45:40 2018 New Revision: 326946 URL: http://llvm.org/viewvc/llvm-project?rev=326946&view=rev Log: CodeGen: Fix address space of indirect function argument The indirect function argument is in alloca address space in LLVM IR. However, during Clang codegen for C++, the address space of indirect function argument should match its address space in the source code, i.e., default addr space, even for indirect argument. This is because destructor of the indirect argument may be called in the caller function, and address of the indirect argument may be taken, in either case the indirect function argument is expected to be in default addr space, not the alloca address space. Therefore, the indirect function argument should be mapped to the temp var casted to default address space. The caller will cast it to alloca addr space when passing it to the callee. In the callee, the argument is also casted to the default address space and used. CallArg is refactored to facilitate this fix. Differential Revision: https://reviews.llvm.org/D34367 Added: cfe/trunk/test/CodeGenCXX/amdgcn-func-arg.cpp Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGCall.h cfe/trunk/lib/CodeGen/CGClass.cpp cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/lib/CodeGen/CGExprCXX.cpp cfe/trunk/lib/CodeGen/CGGPUBuiltin.cpp cfe/trunk/lib/CodeGen/CGObjCGNU.cpp cfe/trunk/lib/CodeGen/CGObjCMac.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl cfe/trunk/test/CodeGenOpenCL/byval.cl Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=326946&r1=326945&r2=326946&view=diff == --- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original) +++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Wed Mar 7 13:45:40 2018 @@ -1160,7 +1160,7 @@ RValue CodeGenFunction::EmitAtomicExpr(A if (UseOptimizedLibcall && Res.getScalarVal()) { llvm::Value *ResVal = Res.getScalarVal(); if (PostOp) { -llvm::Value *LoadVal1 = Args[1].RV.getScalarVal(); +llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal(); ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1); } if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=326946&r1=326945&r2=326946&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Wed Mar 7 13:45:40 2018 @@ -1040,42 +1040,49 @@ void CodeGenFunction::ExpandTypeFromArgs } void CodeGenFunction::ExpandTypeToArgs( -QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy, +QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy, SmallVectorImpl &IRCallArgs, unsigned &IRCallArgPos) { auto Exp = getTypeExpansion(Ty, getContext()); if (auto CAExp = dyn_cast(Exp.get())) { -forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(), - [&](Address EltAddr) { - RValue EltRV = - convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()); - ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos); -}); +Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + : Arg.getKnownRValue().getAggregateAddress(); +forConstantArrayExpansion( +*this, CAExp, Addr, [&](Address EltAddr) { + CallArg EltArg = CallArg( + convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()), + CAExp->EltTy); + ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs, + IRCallArgPos); +}); } else if (auto RExp = dyn_cast(Exp.get())) { -Address This = RV.getAggregateAddress(); +Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + : Arg.getKnownRValue().getAggregateAddress(); for (const CXXBaseSpecifier *BS : RExp->Bases) { // Perform a single step derived-to-base conversion. Address Base = GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1, /*NullCheckValue=*/false, SourceLocation()); - RValue BaseRV = RValue::getAggregate(Base); + CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType()); // Recurse onto bases. - ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs, + ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs, IRCallArgPos); } L
r327515 - CodeGen: Reduce LValue and CallArgList memory footprint before recommitting r326946
Author: yaxunl Date: Wed Mar 14 08:02:28 2018 New Revision: 327515 URL: http://llvm.org/viewvc/llvm-project?rev=327515&view=rev Log: CodeGen: Reduce LValue and CallArgList memory footprint before recommitting r326946 Recent change r326946 (https://reviews.llvm.org/D34367) causes regression in Eigen due to increased memory footprint of CallArg. This patch reduces LValue size from 112 to 96 bytes and reduces inline argument count of CallArgList from 16 to 8. It has been verified that this will let the added deep AST tree test pass with r326946. In the long run, CallArg or LValue memory footprint should be further optimized. Differential Revision: https://reviews.llvm.org/D5 Modified: cfe/trunk/lib/CodeGen/CGCall.h cfe/trunk/lib/CodeGen/CGValue.h Modified: cfe/trunk/lib/CodeGen/CGCall.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.h?rev=327515&r1=327514&r2=327515&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.h (original) +++ cfe/trunk/lib/CodeGen/CGCall.h Wed Mar 14 08:02:28 2018 @@ -224,7 +224,7 @@ public: /// CallArgList - Type for representing both the value and type of /// arguments in a call. class CallArgList : -public SmallVector { +public SmallVector { public: CallArgList() : StackBase(nullptr) {} Modified: cfe/trunk/lib/CodeGen/CGValue.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGValue.h?rev=327515&r1=327514&r2=327515&view=diff == --- cfe/trunk/lib/CodeGen/CGValue.h (original) +++ cfe/trunk/lib/CodeGen/CGValue.h Wed Mar 14 08:02:28 2018 @@ -193,7 +193,7 @@ class LValue { // The alignment to use when accessing this lvalue. (For vector elements, // this is the alignment of the whole vector.) - int64_t Alignment; + unsigned Alignment; // objective-c's ivar bool Ivar:1; @@ -215,13 +215,13 @@ class LValue { // to make the default bitfield pattern all-zeroes. bool ImpreciseLifetime : 1; - LValueBaseInfo BaseInfo; - TBAAAccessInfo TBAAInfo; - // This flag shows if a nontemporal load/stores should be used when accessing // this lvalue. bool Nontemporal : 1; + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Expr *BaseIvarExp; private: @@ -231,7 +231,10 @@ private: "initializing l-value with zero alignment!"); this->Type = Type; this->Quals = Quals; -this->Alignment = Alignment.getQuantity(); +const unsigned MaxAlign = 1U << 31; +this->Alignment = Alignment.getQuantity() <= MaxAlign + ? Alignment.getQuantity() + : MaxAlign; assert(this->Alignment == Alignment.getQuantity() && "Alignment exceeds allowed max!"); this->BaseInfo = BaseInfo; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r327516 - Add deep AST tree test for r327515
Author: yaxunl Date: Wed Mar 14 08:03:31 2018 New Revision: 327516 URL: http://llvm.org/viewvc/llvm-project?rev=327516&view=rev Log: Add deep AST tree test for r327515 Added: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Added: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327516&view=auto == --- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (added) +++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 08:03:31 2018 @@ -0,0 +1,262 @@ +// RUN: %clang_cc1 %s +// This test will cause clang to generate a deep AST tree with many CallArgs. +// This is to make sure there is no stack overflow for such situations. +// It is based on a use case in Eigen: +// https://eigen.tuxfamily.org/dox/group__TutorialAdvancedInitialization.html +// +struct VectorBuilder { + VectorBuilder &operator,(int); +}; +void f() { + VectorBuilder(), + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, +
r327529 - Attempt to fix failure of deep-ast-tree.cpp on ppc64 and atom
Author: yaxunl Date: Wed Mar 14 09:47:49 2018 New Revision: 327529 URL: http://llvm.org/viewvc/llvm-project?rev=327529&view=rev Log: Attempt to fix failure of deep-ast-tree.cpp on ppc64 and atom Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327529&r1=327528&r2=327529&view=diff == --- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original) +++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 09:47:49 2018 @@ -130,6 +130,8 @@ void f() { 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, + /* some archs have smaller stack size */ +#if !defined(__ppc__) && !defined(__arm__) 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, @@ -257,6 +259,7 @@ void f() { 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, +#endif 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0; } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r327547 - Attempt to fix failure of deep-ast-tree.cpp on atom and s390
Author: yaxunl Date: Wed Mar 14 11:24:38 2018 New Revision: 327547 URL: http://llvm.org/viewvc/llvm-project?rev=327547&view=rev Log: Attempt to fix failure of deep-ast-tree.cpp on atom and s390 Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327547&r1=327546&r2=327547&view=diff == --- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original) +++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 11:24:38 2018 @@ -131,7 +131,7 @@ void f() { 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, /* some archs have smaller stack size */ -#if !defined(__ppc__) && !defined(__arm__) +#if !defined(__ppc__) && !defined(__atom__) && !defined(__s390__) 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r327570 - Reduce AST depth for test deep-ast-tree.cpp for atom
Author: yaxunl Date: Wed Mar 14 13:41:05 2018 New Revision: 327570 URL: http://llvm.org/viewvc/llvm-project?rev=327570&view=rev Log: Reduce AST depth for test deep-ast-tree.cpp for atom Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327570&r1=327569&r2=327570&view=diff == --- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original) +++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 13:41:05 2018 @@ -20,6 +20,8 @@ void f() { 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, +/* some archs have smaller stack size */ +#if !defined(__atom__) 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, @@ -130,8 +132,7 @@ void f() { 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - /* some archs have smaller stack size */ -#if !defined(__ppc__) && !defined(__atom__) && !defined(__s390__) +#if !defined(__ppc__) && !defined(__s390__) 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, @@ -260,6 +261,7 @@ void f() { 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, #endif +#endif 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0; } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r327578 - Remove test deep-ast-tree.cpp
Author: yaxunl Date: Wed Mar 14 14:40:55 2018 New Revision: 327578 URL: http://llvm.org/viewvc/llvm-project?rev=327578&view=rev Log: Remove test deep-ast-tree.cpp Since there is no reliable way to change the AST depth of this test by supported stack size of the test environment, remove this test for now. Removed: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Removed: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327577&view=auto == --- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original) +++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (removed) @@ -1,267 +0,0 @@ -// RUN: %clang_cc1 %s -// This test will cause clang to generate a deep AST tree with many CallArgs. -// This is to make sure there is no stack overflow for such situations. -// It is based on a use case in Eigen: -// https://eigen.tuxfamily.org/dox/group__TutorialAdvancedInitialization.html -// -struct VectorBuilder { - VectorBuilder &operator,(int); -}; -void f() { - VectorBuilder(), - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, -/* some archs have smaller stack size */ -#if !defined(__atom__) - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0, - 1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,
r327634 - Recommit r326946 after reducing CallArgList memory footprint
Author: yaxunl Date: Thu Mar 15 08:25:19 2018 New Revision: 327634 URL: http://llvm.org/viewvc/llvm-project?rev=327634&view=rev Log: Recommit r326946 after reducing CallArgList memory footprint Added: cfe/trunk/test/CodeGenCXX/amdgcn-func-arg.cpp Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGCall.h cfe/trunk/lib/CodeGen/CGClass.cpp cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/lib/CodeGen/CGExprCXX.cpp cfe/trunk/lib/CodeGen/CGGPUBuiltin.cpp cfe/trunk/lib/CodeGen/CGObjCGNU.cpp cfe/trunk/lib/CodeGen/CGObjCMac.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl cfe/trunk/test/CodeGenOpenCL/byval.cl Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=327634&r1=327633&r2=327634&view=diff == --- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original) +++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Thu Mar 15 08:25:19 2018 @@ -1160,7 +1160,7 @@ RValue CodeGenFunction::EmitAtomicExpr(A if (UseOptimizedLibcall && Res.getScalarVal()) { llvm::Value *ResVal = Res.getScalarVal(); if (PostOp) { -llvm::Value *LoadVal1 = Args[1].RV.getScalarVal(); +llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal(); ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1); } if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=327634&r1=327633&r2=327634&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Mar 15 08:25:19 2018 @@ -1040,42 +1040,49 @@ void CodeGenFunction::ExpandTypeFromArgs } void CodeGenFunction::ExpandTypeToArgs( -QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy, +QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy, SmallVectorImpl &IRCallArgs, unsigned &IRCallArgPos) { auto Exp = getTypeExpansion(Ty, getContext()); if (auto CAExp = dyn_cast(Exp.get())) { -forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(), - [&](Address EltAddr) { - RValue EltRV = - convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()); - ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos); -}); +Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + : Arg.getKnownRValue().getAggregateAddress(); +forConstantArrayExpansion( +*this, CAExp, Addr, [&](Address EltAddr) { + CallArg EltArg = CallArg( + convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()), + CAExp->EltTy); + ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs, + IRCallArgPos); +}); } else if (auto RExp = dyn_cast(Exp.get())) { -Address This = RV.getAggregateAddress(); +Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + : Arg.getKnownRValue().getAggregateAddress(); for (const CXXBaseSpecifier *BS : RExp->Bases) { // Perform a single step derived-to-base conversion. Address Base = GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1, /*NullCheckValue=*/false, SourceLocation()); - RValue BaseRV = RValue::getAggregate(Base); + CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType()); // Recurse onto bases. - ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs, + ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs, IRCallArgPos); } LValue LV = MakeAddrLValue(This, Ty); for (auto FD : RExp->Fields) { - RValue FldRV = EmitRValueForField(LV, FD, SourceLocation()); - ExpandTypeToArgs(FD->getType(), FldRV, IRFuncTy, IRCallArgs, + CallArg FldArg = + CallArg(EmitRValueForField(LV, FD, SourceLocation()), FD->getType()); + ExpandTypeToArgs(FD->getType(), FldArg, IRFuncTy, IRCallArgs, IRCallArgPos); } } else if (isa(Exp.get())) { -ComplexPairTy CV = RV.getComplexVal(); +ComplexPairTy CV = Arg.getKnownRValue().getComplexVal(); IRCallArgs[IRCallArgPos++] = CV.first; IRCallArgs[IRCallArgPos++] = CV.second; } else { assert(isa(Exp.get())); +auto RV = Arg.getKnownRValue(); assert(RV.isScalar() && "Unexpected non-scalar rvalue during struct expansion."); @@ -3417,13 +3424,17 @@ void CodeGenFunction::EmitCall
r334021 - [CUDA][HIP] Do not emit type info when compiling for device
Author: yaxunl Date: Tue Jun 5 08:11:02 2018 New Revision: 334021 URL: http://llvm.org/viewvc/llvm-project?rev=334021&view=rev Log: [CUDA][HIP] Do not emit type info when compiling for device CUDA/HIP does not support RTTI on device side, therefore there is no point of emitting type info when compiling for device. Emitting type info for device not only clutters the IR with useless global variables, but also causes undefined symbol at linking since vtable for cxxabiv1::class_type_info has external linkage. Differential Revision: https://reviews.llvm.org/D47694 Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/test/CodeGenCUDA/device-vtable.cu Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=334021&r1=334020&r2=334021&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Jun 5 08:11:02 2018 @@ -4900,7 +4900,7 @@ llvm::Constant *CodeGenModule::GetAddrOf // Return a bogus pointer if RTTI is disabled, unless it's for EH. // FIXME: should we even be calling this method if RTTI is disabled // and it's not for EH? - if (!ForEH && !getLangOpts().RTTI) + if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice) return llvm::Constant::getNullValue(Int8PtrTy); if (ForEH && Ty->isObjCObjectPointerType() && Modified: cfe/trunk/test/CodeGenCUDA/device-vtable.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-vtable.cu?rev=334021&r1=334020&r2=334021&view=diff == --- cfe/trunk/test/CodeGenCUDA/device-vtable.cu (original) +++ cfe/trunk/test/CodeGenCUDA/device-vtable.cu Tue Jun 5 08:11:02 2018 @@ -19,7 +19,9 @@ struct H { //CHECK-HOST: @_ZTV1H = //CHECK-HOST-SAME: @_ZN1H6methodEv //CHECK-DEVICE-NOT: @_ZTV1H = - +//CHECK-DEVICE-NOT: @_ZTVN10__cxxabiv117__class_type_infoE +//CHECK-DEVICE-NOT: @_ZTS1H +//CHECK-DEVICE-NOT: @_ZTI1H struct D { __device__ virtual void method(); }; @@ -27,7 +29,9 @@ struct D { //CHECK-DEVICE: @_ZTV1D //CHECK-DEVICE-SAME: @_ZN1D6methodEv //CHECK-HOST-NOT: @_ZTV1D - +//CHECK-DEVICE-NOT: @_ZTVN10__cxxabiv117__class_type_infoE +//CHECK-DEVICE-NOT: @_ZTS1D +//CHECK-DEVICE-NOT: @_ZTI1D // This is the case with mixed host and device virtual methods. It's // impossible to emit a valid vtable in that case because only host or // only device methods would be available during host or device @@ -45,6 +49,9 @@ struct HD { // CHECK-HOST-NOT: @_ZN2HD8d_methodEv // CHECK-HOST-SAME: null // CHECK-BOTH-SAME: ] +// CHECK-DEVICE-NOT: @_ZTVN10__cxxabiv117__class_type_infoE +// CHECK-DEVICE-NOT: @_ZTS2HD +// CHECK-DEVICE-NOT: @_ZTI2HD void H::method() {} //CHECK-HOST: define void @_ZN1H6methodEv ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r334128 - [HIP] Fix unbundling
Author: yaxunl Date: Wed Jun 6 12:44:10 2018 New Revision: 334128 URL: http://llvm.org/viewvc/llvm-project?rev=334128&view=rev Log: [HIP] Fix unbundling HIP uses clang-offload-bundler to bundle intermediate files for host and different gpu archs together. When a file is unbundled, clang-offload-bundler should be called only once, and the objects for host and different gpu archs should be passed to the next jobs. This is because Driver maintains CachedResults which maps triple-arch string to output files for each job. This patch fixes a bug in Driver::BuildJobsForActionNoCache which uses incorrect key for CachedResults for HIP which causes clang-offload-bundler being called mutiple times and incorrect output files being used. It only affects HIP. Differential Revision: https://reviews.llvm.org/D47555 Added: cfe/trunk/test/Driver/hip-binding.hip Modified: cfe/trunk/lib/Driver/Driver.cpp Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=334128&r1=334127&r2=334128&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Wed Jun 6 12:44:10 2018 @@ -2808,7 +2808,7 @@ public: C.MakeAction(HostAction); UnbundlingHostAction->registerDependentActionInfo( C.getSingleOffloadToolChain(), - /*BoundArch=*/StringRef(), Action::OFK_Host); + /*BoundArch=*/"all", Action::OFK_Host); HostAction = UnbundlingHostAction; } @@ -3880,9 +3880,18 @@ InputInfo Driver::BuildJobsForActionNoCa // Get the unique string identifier for this dependence and cache the // result. - CachedResults[{A, GetTriplePlusArchString( -UI.DependentToolChain, BoundArch, -UI.DependentOffloadKind)}] = CurI; + StringRef Arch; + if (TargetDeviceOffloadKind == Action::OFK_HIP) { +if (UI.DependentOffloadKind == Action::OFK_Host) + Arch = "all"; +else + Arch = UI.DependentBoundArch; + } else +Arch = BoundArch; + + CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch, +UI.DependentOffloadKind)}] = + CurI; } // Now that we have all the results generated, select the one that should be Added: cfe/trunk/test/Driver/hip-binding.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-binding.hip?rev=334128&view=auto == --- cfe/trunk/test/Driver/hip-binding.hip (added) +++ cfe/trunk/test/Driver/hip-binding.hip Wed Jun 6 12:44:10 2018 @@ -0,0 +1,15 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: touch %t.o +// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\ +// RUN: 2>&1 | FileCheck %s + +// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]" +// CHECK-NOT: offload bundler +// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]" +// CHECK-NOT: offload bundler +// CHECK: # "x86_64--linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r334457 - [CUDA][HIP] Set kernel calling convention before arrange function
Author: yaxunl Date: Mon Jun 11 17:16:33 2018 New Revision: 334457 URL: http://llvm.org/viewvc/llvm-project?rev=334457&view=rev Log: [CUDA][HIP] Set kernel calling convention before arrange function Currently clang set kernel calling convention for CUDA/HIP after arranging function, which causes incorrect kernel function type since it depends on calling convention. This patch moves setting kernel convention before arranging function. Differential Revision: https://reviews.llvm.org/D47733 Added: cfe/trunk/test/CodeGenCUDA/kernel-args.cu Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/lib/CodeGen/TargetInfo.h Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=334457&r1=334456&r2=334457&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jun 11 17:16:33 2018 @@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const FTP->getCanonicalTypeUnqualified().getAs(), MD); } +/// Set calling convention for CUDA/HIP kernel. +static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM, + const FunctionDecl *FD) { + if (FD->hasAttr()) { +const FunctionType *FT = FTy->getAs(); +CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT); +FTy = FT->getCanonicalTypeUnqualified(); + } +} + /// Arrange the argument and result information for a declaration or /// definition of the given C++ non-static member function. The /// member function must be an ordinary function, i.e. not a @@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaratio assert(!isa(MD) && "wrong method for constructors!"); assert(!isa(MD) && "wrong method for destructors!"); - CanQual prototype = GetFormalType(MD); + CanQualType FT = GetFormalType(MD).getAs(); + setCUDAKernelCallingConvention(FT, CGM, MD); + auto prototype = FT.getAs(); if (MD->isInstance()) { // The abstract case is perfectly fine. @@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified(); assert(isa(FTy)); + setCUDAKernelCallingConvention(FTy, CGM, FD); // When declaring a function without a prototype, always use a // non-variadic type. Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=334457&r1=334456&r2=334457&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jun 11 17:16:33 2018 @@ -3671,8 +3671,6 @@ void CodeGenModule::EmitGlobalFunctionDe MaybeHandleStaticInExternC(D, Fn); - if (D->hasAttr()) -getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn); maybeSetTrivialComdat(*D, *Fn); Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=334457&r1=334456&r2=334457&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Jun 11 17:16:33 2018 @@ -7646,7 +7646,7 @@ public: llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const override; bool shouldEmitStaticExternCAliases() const override; - void setCUDAKernelCallingConvention(llvm::Function *F) const override; + void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; }; } @@ -7783,8 +7783,9 @@ bool AMDGPUTargetCodeGenInfo::shouldEmit } void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( -llvm::Function *F) const { - F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); +const FunctionType *&FT) const { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); } //===--===// Modified: cfe/trunk/lib/CodeGen/TargetInfo.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.h?rev=334457&r1=334456&r2=334457&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.h (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.h Mon Jun 11 17:16:33 2018 @@ -302,7 +302,7 @@ public: /// as 'used', and having internal linkage. virtual bool shouldEmitStaticExternCAliases() const { return true; } - virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {} + virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} }; } // namespace
r334561 - [CUDA][HIP] Allow CUDA __global__ functions to have amdgpu kernel attributes
Author: yaxunl Date: Tue Jun 12 16:58:59 2018 New Revision: 334561 URL: http://llvm.org/viewvc/llvm-project?rev=334561&view=rev Log: [CUDA][HIP] Allow CUDA __global__ functions to have amdgpu kernel attributes There are HIP applications e.g. Tensorflow 1.3 using amdgpu kernel attributes, however currently they are only allowed on OpenCL kernel functions. This patch will allow amdgpu kernel attributes to be applied to CUDA/HIP __global__ functions. Differential Revision: https://reviews.llvm.org/D47958 Added: cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td cfe/trunk/lib/Sema/SemaDeclAttr.cpp cfe/trunk/test/SemaCUDA/amdgpu-attrs.cu cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=334561&r1=334560&r2=334561&view=diff == --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original) +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Tue Jun 12 16:58:59 2018 @@ -8435,7 +8435,7 @@ def err_reference_pipe_type : Error < "pipes packet types cannot be of reference type">; def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 'main'">; def err_opencl_kernel_attr : - Error<"attribute %0 can only be applied to a kernel function">; + Error<"attribute %0 can only be applied to an OpenCL kernel function">; def err_opencl_return_value_with_address_space : Error< "return value cannot be qualified with address space">; def err_opencl_constant_no_init : Error< Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=334561&r1=334560&r2=334561&view=diff == --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original) +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Tue Jun 12 16:58:59 2018 @@ -6468,25 +6468,27 @@ void Sema::ProcessDeclAttributeList(Scop } else if (const auto *A = D->getAttr()) { Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; D->setInvalidDecl(); -} else if (const auto *A = D->getAttr()) { - Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) -<< A << ExpectedKernelFunction; - D->setInvalidDecl(); -} else if (const auto *A = D->getAttr()) { - Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) -<< A << ExpectedKernelFunction; - D->setInvalidDecl(); -} else if (const auto *A = D->getAttr()) { - Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) -<< A << ExpectedKernelFunction; - D->setInvalidDecl(); -} else if (const auto *A = D->getAttr()) { - Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) -<< A << ExpectedKernelFunction; - D->setInvalidDecl(); } else if (const auto *A = D->getAttr()) { Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; D->setInvalidDecl(); +} else if (!D->hasAttr()) { + if (const auto *A = D->getAttr()) { +Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) +<< A << ExpectedKernelFunction; +D->setInvalidDecl(); + } else if (const auto *A = D->getAttr()) { +Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) +<< A << ExpectedKernelFunction; +D->setInvalidDecl(); + } else if (const auto *A = D->getAttr()) { +Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) +<< A << ExpectedKernelFunction; +D->setInvalidDecl(); + } else if (const auto *A = D->getAttr()) { +Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) +<< A << ExpectedKernelFunction; +D->setInvalidDecl(); + } } } } Added: cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu?rev=334561&view=auto == --- cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu (added) +++ cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu Tue Jun 12 16:58:59 2018 @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx \ +// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \ +// RUN: -check-prefix=NAMD +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -verify -o - %s | FileCheck -check-prefix=NAMD %s + +#include "Inputs/cuda.h" + +__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics +__global__ void flat_work_group_size_32_64() { +// CHECK: define amdgpu_kernel void @_Z26fl
r334837 - [NFC] Add CreateMemTempWithoutCast and CreateTempAllocaWithoutCast
Author: yaxunl Date: Fri Jun 15 08:33:22 2018 New Revision: 334837 URL: http://llvm.org/viewvc/llvm-project?rev=334837&view=rev Log: [NFC] Add CreateMemTempWithoutCast and CreateTempAllocaWithoutCast This is partial re-commit of r332982 Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGExpr.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=334837&r1=334836&r2=334837&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Fri Jun 15 08:33:22 2018 @@ -3901,9 +3901,8 @@ RValue CodeGenFunction::EmitCall(const C assert(NumIRArgs == 1); if (!I->isAggregate()) { // Make a temporary alloca to pass the argument. -Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "indirect-arg-temp", /*Alloca=*/nullptr, - /*Cast=*/false); +Address Addr = CreateMemTempWithoutCast( +I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); IRCallArgs[FirstIRArg] = Addr.getPointer(); I->copyInto(*this, Addr); @@ -3948,9 +3947,8 @@ RValue CodeGenFunction::EmitCall(const C } if (NeedCopy) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "byval-temp", /*Alloca=*/nullptr, - /*Cast=*/false); + Address AI = CreateMemTempWithoutCast( + I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); IRCallArgs[FirstIRArg] = AI.getPointer(); I->copyInto(*this, AI); } else { Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=334837&r1=334836&r2=334837&view=diff == --- cfe/trunk/lib/CodeGen/CGExpr.cpp (original) +++ cfe/trunk/lib/CodeGen/CGExpr.cpp Fri Jun 15 08:33:22 2018 @@ -61,21 +61,30 @@ llvm::Value *CodeGenFunction::EmitCastTo /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. +Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, + CharUnits Align, + const Twine &Name, + llvm::Value *ArraySize) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); + Alloca->setAlignment(Align.getQuantity()); + return Address(Alloca, Align); +} + +/// CreateTempAlloca - This creates a alloca and inserts it into the entry +/// block. The alloca is casted to default address space if necessary. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, const Twine &Name, llvm::Value *ArraySize, - Address *AllocaAddr, - bool CastToDefaultAddrSpace) { - auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); - Alloca->setAlignment(Align.getQuantity()); + Address *AllocaAddr) { + auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); if (AllocaAddr) -*AllocaAddr = Address(Alloca, Align); - llvm::Value *V = Alloca; +*AllocaAddr = Alloca; + llvm::Value *V = Alloca.getPointer(); // Alloca always returns a pointer in alloca address space, which may // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { + if (getASTAllocaAddressSpace() != LangAS::Default) { auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); llvm::IRBuilderBase::InsertPointGuard IPG(Builder); // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, @@ -128,19 +137,26 @@ Address CodeGenFunction::CreateIRTemp(Qu } Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name, - Address *Alloca, - bool CastToDefaultAddrSpace) { + Address *Alloca) { // FIXME: Should we prefer the preferred type alignment here? - return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, Alloca, - CastToDefaultAddrSpace); + return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, Alloca); } Address CodeGenFunction::CreateMemTem
r334879 - Call CreateTempAllocaWithoutCast for ActiveFlag
Author: yaxunl Date: Fri Jun 15 18:20:52 2018 New Revision: 334879 URL: http://llvm.org/viewvc/llvm-project?rev=334879&view=rev Log: Call CreateTempAllocaWithoutCast for ActiveFlag This is partial re-commit of r332982. Modified: cfe/trunk/lib/CodeGen/CGCleanup.cpp Modified: cfe/trunk/lib/CodeGen/CGCleanup.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCleanup.cpp?rev=334879&r1=334878&r2=334879&view=diff == --- cfe/trunk/lib/CodeGen/CGCleanup.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCleanup.cpp Fri Jun 15 18:20:52 2018 @@ -283,8 +283,8 @@ void EHScopeStack::popNullFixups() { void CodeGenFunction::initFullExprCleanup() { // Create a variable to decide whether the cleanup needs to be run. - Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), -"cleanup.cond"); + Address active = CreateTempAllocaWithoutCast( + Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond"); // Initialize it to false at a site that's guaranteed to be run // before each evaluation. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r334886 - Add RUN line for amdgcn to lit test conditional-temporaries.cpp
Author: yaxunl Date: Sat Jun 16 05:28:51 2018 New Revision: 334886 URL: http://llvm.org/viewvc/llvm-project?rev=334886&view=rev Log: Add RUN line for amdgcn to lit test conditional-temporaries.cpp This is partial re-commit of r332982. Modified: cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp Modified: cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp?rev=334886&r1=334885&r2=334886&view=diff == --- cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp (original) +++ cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp Sat Jun 16 05:28:51 2018 @@ -1,4 +1,6 @@ +// REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O3 | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=amdgcn-amd-amdhsa -O3 | FileCheck %s namespace { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r344057 - [CUDA][HIP] Fix ShouldDeleteSpecialMember for inherited constructors
Author: yaxunl Date: Tue Oct 9 08:53:14 2018 New Revision: 344057 URL: http://llvm.org/viewvc/llvm-project?rev=344057&view=rev Log: [CUDA][HIP] Fix ShouldDeleteSpecialMember for inherited constructors ShouldDeleteSpecialMember is called upon inherited constructors. It calls inferCUDATargetForImplicitSpecialMember. Normally the special member enum passed to ShouldDeleteSpecialMember matches the constructor. However this is not true when inherited constructor is passed, where DefaultConstructor is passed to treat the inherited constructor as DefaultConstructor. However inferCUDATargetForImplicitSpecialMember expects the special member enum argument to match the constructor, which results in assertion when this expection is not satisfied. This patch checks whether the constructor is inherited. If true it will get the real special member enum for the constructor and pass it to inferCUDATargetForImplicitSpecialMember. Differential Revision: https://reviews.llvm.org/D51809 Added: cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu cfe/trunk/test/SemaCUDA/inherited-ctor.cu Modified: cfe/trunk/lib/Sema/SemaDeclCXX.cpp Modified: cfe/trunk/lib/Sema/SemaDeclCXX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclCXX.cpp?rev=344057&r1=344056&r2=344057&view=diff == --- cfe/trunk/lib/Sema/SemaDeclCXX.cpp (original) +++ cfe/trunk/lib/Sema/SemaDeclCXX.cpp Tue Oct 9 08:53:14 2018 @@ -7222,8 +7222,17 @@ bool Sema::ShouldDeleteSpecialMember(CXX if (getLangOpts().CUDA) { // We should delete the special member in CUDA mode if target inference // failed. -return inferCUDATargetForImplicitSpecialMember(RD, CSM, MD, SMI.ConstArg, - Diagnose); +// For inherited constructors (non-null ICI), CSM may be passed so that MD +// is treated as certain special member, which may not reflect what special +// member MD really is. However inferCUDATargetForImplicitSpecialMember +// expects CSM to match MD, therefore recalculate CSM. +assert(ICI || CSM == getSpecialMember(MD)); +auto RealCSM = CSM; +if (ICI) + RealCSM = getSpecialMember(MD); + +return inferCUDATargetForImplicitSpecialMember(RD, RealCSM, MD, + SMI.ConstArg, Diagnose); } return false; Added: cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu?rev=344057&view=auto == --- cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu (added) +++ cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu Tue Oct 9 08:53:14 2018 @@ -0,0 +1,205 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -Wno-defaulted-function-deleted + +#include "Inputs/cuda.h" + +//-- +// Test 1: infer inherited default ctor to be host. + +struct A1_with_host_ctor { + A1_with_host_ctor() {} +}; +// expected-note@-3 {{candidate constructor (the implicit copy constructor) not viable}} +// expected-note@-4 {{candidate constructor (the implicit move constructor) not viable}} + +// The inherited default constructor is inferred to be host, so we'll encounter +// an error when calling it from a __device__ function, but not from a __host__ +// function. +struct B1_with_implicit_default_ctor : A1_with_host_ctor { + using A1_with_host_ctor::A1_with_host_ctor; +}; + +// expected-note@-4 {{call to __host__ function from __device__}} +// expected-note@-5 {{candidate constructor (the implicit copy constructor) not viable}} +// expected-note@-6 {{candidate constructor (the implicit move constructor) not viable}} +// expected-note@-6 2{{constructor from base class 'A1_with_host_ctor' inherited here}} + +void hostfoo() { + B1_with_implicit_default_ctor b; +} + +__device__ void devicefoo() { + B1_with_implicit_default_ctor b; // expected-error {{no matching constructor}} +} + +//-- +// Test 2: infer inherited default ctor to be device. + +struct A2_with_device_ctor { + __device__ A2_with_device_ctor() {} +}; +// expected-note@-3 {{candidate constructor (the implicit copy constructor) not viable}} +// expected-note@-4 {{candidate constructor (the implicit move constructor) not viable}} + +struct B2_with_implicit_default_ctor : A2_with_device_ctor { + using A2_with_device_ctor::A2_with_device_ctor; +}; + +// expected-note@-4 {{call to __device__ function from __host__}} +// expected-note@-5 {{candidate constructor (the implicit copy constructor) not viable}} +// expected-note@-6 {{candidate constructor (the implicit move constructor) not viable}} +// expected-note@-6 2{{constructor from b
r344630 - Disable code object version 3 for HIP toolchain
Author: yaxunl Date: Tue Oct 16 10:36:23 2018 New Revision: 344630 URL: http://llvm.org/viewvc/llvm-project?rev=344630&view=rev Log: Disable code object version 3 for HIP toolchain AMDGPU backend will switch to code object version 3 by default. Since HIP runtime is not ready, disable it until the runtime is ready. Differential Revision: https://reviews.llvm.org/D53325 Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip cfe/trunk/test/Driver/hip-toolchain-rdc.hip Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=344630&r1=344629&r2=344630&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Tue Oct 16 10:36:23 2018 @@ -154,7 +154,7 @@ const char *AMDGCN::Linker::constructLlc llvm::StringRef OutputFilePrefix, const char *InputFileName) const { // Construct llc command. ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa", -"-filetype=obj", +"-filetype=obj", "-mattr=-code-object-v3", Args.MakeArgString("-mcpu=" + SubArchName), "-o"}; std::string LlcOutputFileName = C.getDriver().GetTemporaryPath(OutputFilePrefix, "o"); Modified: cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip?rev=344630&r1=344629&r2=344630&view=diff == --- cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip (original) +++ cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip Tue Oct 16 10:36:23 2018 @@ -32,7 +32,9 @@ // CHECK-SAME: "-o" [[OPT_BC_DEV_A_803:".*-gfx803-optimized.*bc"]] // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]] +// CHECK-SAME: "-filetype=obj" +// CHECK-SAME: "-mattr=-code-object-v3" +// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]] // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]] @@ -57,7 +59,9 @@ // CHECK-SAME: "-o" [[OPT_BC_DEV_A_900:".*-gfx900-optimized.*bc"]] // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]] +// CHECK-SAME: "-filetype=obj" +// CHECK-SAME: "-mattr=-code-object-v3" +// CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]] // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]] @@ -97,7 +101,9 @@ // CHECK-SAME: "-o" [[OPT_BC_DEV_B_803:".*-gfx803-optimized.*bc"]] // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]] +// CHECK-SAME: "-filetype=obj" +// CHECK-SAME: "-mattr=-code-object-v3" +// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]] // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]] @@ -122,7 +128,9 @@ // CHECK-SAME: "-o" [[OPT_BC_DEV_B_900:".*-gfx900-optimized.*bc"]] // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]] +// CHECK-SAME: "-filetype=obj" +// CHECK-SAME: "-mattr=-code-object-v3" +// CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]] // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]] Modified: cfe/trunk/test/Driver/hip-toolchain-rdc.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain-rdc.hip?rev=344630&r1=344629&r2=344630&view=diff == --- cfe/trunk/test/Driver/hip-toolchain-rdc.hip (original) +++ cfe/trunk/test/Driver/hip-toolchain-rdc.hip Tue Oct 16 10:36:23 2018 @@ -35,7 +35,9 @@ // CHECK-SAME: "-o" [[OPT_BC_DEV1:".*-gfx803-optimized.*bc"]] // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa" -// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV1:".*-gfx803-.*o"]] +// CHECK-SAME: "-filetype=obj" +// CHECK-SAME: "-mattr=-code-object-v3" +// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV1:".*-gfx803-.*o"]] // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]] @@ -61,7 +63,9 @@ // CHECK-SAME: "-o" [[OPT_BC_DEV2:".*-gfx900-optimized.*bc"]] // CHECK: [[LLC]] [[OPT_BC_DEV2]] "-mtriple=amdgcn-am
r344665 - AMDGPU: add __builtin_amdgcn_update_dpp
Author: yaxunl Date: Tue Oct 16 19:32:26 2018 New Revision: 344665 URL: http://llvm.org/viewvc/llvm-project?rev=344665&view=rev Log: AMDGPU: add __builtin_amdgcn_update_dpp Emit llvm.amdgcn.update.dpp for both __builtin_amdgcn_mov_dpp and __builtin_amdgcn_update_dpp. The first argument to llvm.amdgcn.update.dpp will be undef for __builtin_amdgcn_mov_dpp. Differential Revision: https://reviews.llvm.org/D52320 Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=344665&r1=344664&r2=344665&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Oct 16 19:32:26 2018 @@ -122,6 +122,7 @@ TARGET_BUILTIN(__builtin_amdgcn_fracth, TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime") TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp") +TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp") TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "vi-insts") //===--===// Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=344665&r1=344664&r2=344665&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Oct 16 19:32:26 2018 @@ -11347,12 +11347,16 @@ Value *CodeGenFunction::EmitAMDGPUBuilti case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); - case AMDGPU::BI__builtin_amdgcn_mov_dpp: { -llvm::SmallVector Args; -for (unsigned I = 0; I != 5; ++I) + case AMDGPU::BI__builtin_amdgcn_mov_dpp: + case AMDGPU::BI__builtin_amdgcn_update_dpp: { +llvm::SmallVector Args; +for (unsigned I = 0; I != E->getNumArgs(); ++I) Args.push_back(EmitScalarExpr(E->getArg(I))); -Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, -Args[0]->getType()); +assert(Args.size() == 5 || Args.size() == 6); +if (Args.size() == 5) + Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); +Value *F = +CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_div_fixup: Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl?rev=344665&r1=344664&r2=344665&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl Tue Oct 16 19:32:26 2018 @@ -90,12 +90,19 @@ void test_s_dcache_wb() } // CHECK-LABEL: @test_mov_dpp -// CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 false) +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %src, i32 0, i32 0, i32 0, i1 false) void test_mov_dpp(global int* out, int src) { *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false); } +// CHECK-LABEL: @test_update_dpp +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false) +void test_update_dpp(global int* out, int arg1, int arg2) +{ + *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false); +} + // CHECK-LABEL: @test_ds_fadd // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false) void test_ds_faddf(local float *out, float src) { Modified: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl?rev=344665&r1=344664&r2=344665&view=diff == --- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl (original) +++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl Tue Oct 16 19:32:26 2018 @@ -102,6 +102,15 @@ void test_mov_dpp2(global int* out, int *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}} } +void test_update_dpp2(global int* out, int a, int b, int c, int d, int e, bool f) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, false); + *out = __builtin_amdgcn_update_dpp(a, 0, c, 0, 0, false); // expected-er
r344996 - Add gfx904 and gfx906 to GPU Arch
Author: yaxunl Date: Mon Oct 22 19:05:31 2018 New Revision: 344996 URL: http://llvm.org/viewvc/llvm-project?rev=344996&view=rev Log: Add gfx904 and gfx906 to GPU Arch Differential Revision: https://reviews.llvm.org/D53472 Modified: cfe/trunk/include/clang/Basic/Cuda.h cfe/trunk/lib/Basic/Cuda.cpp cfe/trunk/lib/Basic/Targets/NVPTX.cpp Modified: cfe/trunk/include/clang/Basic/Cuda.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Cuda.h?rev=344996&r1=344995&r2=344996&view=diff == --- cfe/trunk/include/clang/Basic/Cuda.h (original) +++ cfe/trunk/include/clang/Basic/Cuda.h Mon Oct 22 19:05:31 2018 @@ -62,6 +62,8 @@ enum class CudaArch { GFX810, GFX900, GFX902, + GFX904, + GFX906, LAST, }; const char *CudaArchToString(CudaArch A); Modified: cfe/trunk/lib/Basic/Cuda.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Cuda.cpp?rev=344996&r1=344995&r2=344996&view=diff == --- cfe/trunk/lib/Basic/Cuda.cpp (original) +++ cfe/trunk/lib/Basic/Cuda.cpp Mon Oct 22 19:05:31 2018 @@ -90,6 +90,10 @@ const char *CudaArchToString(CudaArch A) return "gfx900"; case CudaArch::GFX902: // TBA return "gfx902"; + case CudaArch::GFX904: // TBA +return "gfx904"; + case CudaArch::GFX906: // TBA +return "gfx906"; } llvm_unreachable("invalid enum"); } @@ -124,6 +128,8 @@ CudaArch StringToCudaArch(llvm::StringRe .Case("gfx810", CudaArch::GFX810) .Case("gfx900", CudaArch::GFX900) .Case("gfx902", CudaArch::GFX902) + .Case("gfx904", CudaArch::GFX904) + .Case("gfx906", CudaArch::GFX906) .Default(CudaArch::UNKNOWN); } @@ -233,6 +239,8 @@ CudaVirtualArch VirtualArchForCudaArch(C case CudaArch::GFX810: case CudaArch::GFX900: case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: return CudaVirtualArch::COMPUTE_AMDGCN; } llvm_unreachable("invalid enum"); @@ -277,6 +285,8 @@ CudaVersion MinVersionForCudaArch(CudaAr case CudaArch::GFX810: case CudaArch::GFX900: case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: return CudaVersion::CUDA_70; } llvm_unreachable("invalid enum"); Modified: cfe/trunk/lib/Basic/Targets/NVPTX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/NVPTX.cpp?rev=344996&r1=344995&r2=344996&view=diff == --- cfe/trunk/lib/Basic/Targets/NVPTX.cpp (original) +++ cfe/trunk/lib/Basic/Targets/NVPTX.cpp Mon Oct 22 19:05:31 2018 @@ -188,6 +188,8 @@ void NVPTXTargetInfo::getTargetDefines(c case CudaArch::GFX810: case CudaArch::GFX900: case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: case CudaArch::LAST: break; case CudaArch::UNKNOWN: ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r352488 - [CUDA][HIP] Do not diagnose use of _Float16
Author: yaxunl Date: Tue Jan 29 05:20:23 2019 New Revision: 352488 URL: http://llvm.org/viewvc/llvm-project?rev=352488&view=rev Log: [CUDA][HIP] Do not diagnose use of _Float16 r352221 caused regressions in CUDA/HIP since device function may use _Float16 whereas host does not support it. In this case host compilation should not diagnose usage of _Float16 in device functions or variables. For now just do not diagnose _Float16 for CUDA/HIP. In the future we should have more precise check. Differential Revision: https://reviews.llvm.org/D57369 Added: cfe/trunk/test/SemaCUDA/float16.cu Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp cfe/trunk/lib/Sema/SemaType.cpp Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=352488&r1=352487&r2=352488&view=diff == --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original) +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Tue Jan 29 05:20:23 2019 @@ -616,8 +616,11 @@ NumericLiteralParser::NumericLiteralPars if (isHalf || isFloat || isLong || isFloat128) break; // HF, FF, LF, QF invalid. - if (PP.getTargetInfo().hasFloat16Type() && s + 2 < ThisTokEnd && - s[1] == '1' && s[2] == '6') { + // CUDA host and device may have different _Float16 support, therefore + // allows f16 literals to avoid false alarm. + // ToDo: more precise check for CUDA. + if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) && + s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { s += 2; // success, eat up 2 characters. isFloat16 = true; continue; Modified: cfe/trunk/lib/Sema/SemaType.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaType.cpp?rev=352488&r1=352487&r2=352488&view=diff == --- cfe/trunk/lib/Sema/SemaType.cpp (original) +++ cfe/trunk/lib/Sema/SemaType.cpp Tue Jan 29 05:20:23 2019 @@ -1442,7 +1442,10 @@ static QualType ConvertDeclSpecToType(Ty Result = Context.Int128Ty; break; case DeclSpec::TST_float16: -if (!S.Context.getTargetInfo().hasFloat16Type()) +// CUDA host and device may have different _Float16 support, therefore +// do not diagnose _Float16 usage to avoid false alarm. +// ToDo: more precise diagnostics for CUDA. +if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA) S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "_Float16"; Result = Context.Float16Ty; Added: cfe/trunk/test/SemaCUDA/float16.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/float16.cu?rev=352488&view=auto == --- cfe/trunk/test/SemaCUDA/float16.cu (added) +++ cfe/trunk/test/SemaCUDA/float16.cu Tue Jan 29 05:20:23 2019 @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s +// expected-no-diagnostics +#include "Inputs/cuda.h" + +__device__ void f(_Float16 x); + +__device__ _Float16 x = 1.0f16; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r352620 - [HIP] Fix size_t for MSVC environment
Author: yaxunl Date: Wed Jan 30 04:26:54 2019 New Revision: 352620 URL: http://llvm.org/viewvc/llvm-project?rev=352620&view=rev Log: [HIP] Fix size_t for MSVC environment In 64 bit MSVC environment size_t is defined as unsigned long long. In single source language like HIP, data layout should be consistent in device and host compilation, therefore copy data layout controlling fields from Aux target for AMDGPU target. Differential Revision: https://reviews.llvm.org/D56318 Added: cfe/trunk/test/SemaCUDA/amdgpu-size_t.cu Modified: cfe/trunk/include/clang/Basic/TargetInfo.h cfe/trunk/lib/Basic/TargetInfo.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/lib/Frontend/CompilerInstance.cpp Modified: cfe/trunk/include/clang/Basic/TargetInfo.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TargetInfo.h?rev=352620&r1=352619&r2=352620&view=diff == --- cfe/trunk/include/clang/Basic/TargetInfo.h (original) +++ cfe/trunk/include/clang/Basic/TargetInfo.h Wed Jan 30 04:26:54 2019 @@ -48,22 +48,10 @@ class SourceManager; namespace Builtin { struct Info; } -/// Exposes information about the current target. -/// -class TargetInfo : public RefCountedBase { - std::shared_ptr TargetOpts; - llvm::Triple Triple; -protected: - // Target values set by the ctor of the actual target implementation. Default - // values are specified by the TargetInfo constructor. - bool BigEndian; - bool TLSSupported; - bool VLASupported; - bool NoAsmVariants; // True if {|} are normal characters. - bool HasLegalHalfType; // True if the backend supports operations on the half - // LLVM IR type. - bool HasFloat128; - bool HasFloat16; +/// Fields controlling how types are laid out in memory; these may need to +/// be copied for targets like AMDGPU that base their ABIs on an auxiliary +/// CPU target. +struct TransferrableTargetInfo { unsigned char PointerWidth, PointerAlign; unsigned char BoolWidth, BoolAlign; unsigned char IntWidth, IntAlign; @@ -104,15 +92,92 @@ protected: unsigned char SuitableAlign; unsigned char DefaultAlignForAttributeAligned; unsigned char MinGlobalAlign; - unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth; + + unsigned short NewAlign; unsigned short MaxVectorAlign; unsigned short MaxTLSAlign; + + const llvm::fltSemantics *HalfFormat, *FloatFormat, *DoubleFormat, +*LongDoubleFormat, *Float128Format; + + ///=== Target Data Type Query Methods ---===// + enum IntType { +NoInt = 0, +SignedChar, +UnsignedChar, +SignedShort, +UnsignedShort, +SignedInt, +UnsignedInt, +SignedLong, +UnsignedLong, +SignedLongLong, +UnsignedLongLong + }; + + enum RealType { +NoFloat = 255, +Float = 0, +Double, +LongDouble, +Float128 + }; +protected: + IntType SizeType, IntMaxType, PtrDiffType, IntPtrType, WCharType, + WIntType, Char16Type, Char32Type, Int64Type, SigAtomicType, + ProcessIDType; + + /// Whether Objective-C's built-in boolean type should be signed char. + /// + /// Otherwise, when this flag is not set, the normal built-in boolean type is + /// used. + unsigned UseSignedCharForObjCBool : 1; + + /// Control whether the alignment of bit-field types is respected when laying + /// out structures. If true, then the alignment of the bit-field type will be + /// used to (a) impact the alignment of the containing structure, and (b) + /// ensure that the individual bit-field will not straddle an alignment + /// boundary. + unsigned UseBitFieldTypeAlignment : 1; + + /// Whether zero length bitfields (e.g., int : 0;) force alignment of + /// the next bitfield. + /// + /// If the alignment of the zero length bitfield is greater than the member + /// that follows it, `bar', `bar' will be aligned as the type of the + /// zero-length bitfield. + unsigned UseZeroLengthBitfieldAlignment : 1; + + /// Whether explicit bit field alignment attributes are honored. + unsigned UseExplicitBitFieldAlignment : 1; + + /// If non-zero, specifies a fixed alignment value for bitfields that follow + /// zero length bitfield, regardless of the zero length bitfield type. + unsigned ZeroLengthBitfieldBoundary; +}; + +/// Exposes information about the current target. +/// +class TargetInfo : public virtual TransferrableTargetInfo, + public RefCountedBase { + std::shared_ptr TargetOpts; + llvm::Triple Triple; +protected: + // Target values set by the ctor of the actual target implementation. Default + // values are specified by the TargetInfo constructor. + bool BigEndian; + bool TLSSupported; + bool VLASupported; + bool NoAsmVariants; // True if {|} are normal characters. + bool HasLegalHalfType; // True if the backend supports operations on the half +
r346413 - Fix bitcast to address space cast for coerced load/stores
Author: yaxunl Date: Thu Nov 8 08:55:46 2018 New Revision: 346413 URL: http://llvm.org/viewvc/llvm-project?rev=346413&view=rev Log: Fix bitcast to address space cast for coerced load/stores Coerced load/stores through memory do not take into account potential address space differences when it creates its bitcasts. Patch by David Salinas. Differential Revision: https://reviews.llvm.org/D53780 Added: cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp Modified: cfe/trunk/lib/CodeGen/CGCall.cpp Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=346413&r1=346412&r2=346413&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Nov 8 08:55:46 2018 @@ -1253,8 +1253,8 @@ static llvm::Value *CreateCoercedLoad(Ad // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment()); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); - Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy); + Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); + Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty); CGF.Builder.CreateMemCpy(Casted, SrcCasted, llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), false); @@ -1335,8 +1335,8 @@ static void CreateCoercedStore(llvm::Val // to that information. Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment()); CGF.Builder.CreateStore(Src, Tmp); -Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); -Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy); +Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); +Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty); CGF.Builder.CreateMemCpy(DstCasted, Casted, llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), false); Added: cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp?rev=346413&view=auto == --- cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp (added) +++ cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp Thu Nov 8 08:55:46 2018 @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s + +template struct my_vector_base; + +template +struct my_vector_base { +typedef T Native_vec_ __attribute__((ext_vector_type(1))); + +union { +Native_vec_ data; +struct { +T x; +}; +}; +}; + +template +struct my_vector_type : public my_vector_base { +using my_vector_base::data; +using typename my_vector_base::Native_vec_; + +template< typename U> +my_vector_type(U x) noexcept +{ +for (auto i = 0u; i != rank; ++i) data[i] = x; +} +my_vector_type& operator+=(const my_vector_type& x) noexcept +{ +data += x.data; +return *this; +} +}; + +template +inline +my_vector_type operator+( +const my_vector_type& x, const my_vector_type& y) noexcept +{ +return my_vector_type{x} += y; +} + +using char1 = my_vector_type; + +int mane() { + +char1 f1{1}; +char1 f2{1}; + +// CHECK: %[[a:[^ ]+]] = addrspacecast i16 addrspace(5)* %{{[^ ]+}} to i16* +// CHECK: %[[a:[^ ]+]] = addrspacecast %{{[^ ]+}} addrspace(5)* %{{[^ ]+}} to %{{[^ ]+}} + +char1 f3 = f1 + f2; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r346536 - [HIP] Remove useless sections in linked files
Author: yaxunl Date: Fri Nov 9 10:52:05 2018 New Revision: 346536 URL: http://llvm.org/viewvc/llvm-project?rev=346536&view=rev Log: [HIP] Remove useless sections in linked files clang-offload-bundler creates __CLANG_OFFLOAD_BUNDLE__* sections in the bundles, which get into the linked files. These sections are useless after linking. They waste disk space and cause confusion for clang when directly linked with other object files, therefore should be removed. Differential Revision: https://reviews.llvm.org/D54275 Modified: cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp Modified: cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp?rev=346536&r1=346535&r2=346536&view=diff == --- cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp Fri Nov 9 10:52:05 2018 @@ -1436,6 +1436,10 @@ void tools::AddHIPLinkerScript(const Too LksStream << "PROVIDE_HIDDEN(__hip_fatbin = .);\n"; LksStream << "" << BundleFileName << "\n"; LksStream << " }\n"; + LksStream << " /DISCARD/ :\n"; + LksStream << " {\n"; + LksStream << "* ( __CLANG_OFFLOAD_BUNDLE__* )\n"; + LksStream << " }\n"; LksStream << "}\n"; LksStream << "INSERT BEFORE .data\n"; LksStream.flush(); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r346828 - [HIP] Fix device only compilation
Author: yaxunl Date: Tue Nov 13 20:47:31 2018 New Revision: 346828 URL: http://llvm.org/viewvc/llvm-project?rev=346828&view=rev Log: [HIP] Fix device only compilation Fix a bug causing host code being compiled when --cude-device-only is set. Differential Revision: https://reviews.llvm.org/D54496 Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/cuda-phases.cu Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=346828&r1=346827&r2=346828&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Tue Nov 13 20:47:31 2018 @@ -2616,17 +2616,19 @@ class OffloadingActionBuilder final { C.MakeAction(CudaDeviceActions, types::TY_HIP_FATBIN); -DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, - AssociatedOffloadKind); -// Clear the fat binary, it is already a dependence to an host -// action. -CudaFatBinary = nullptr; +if (!CompileDeviceOnly) { + DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, + AssociatedOffloadKind); + // Clear the fat binary, it is already a dependence to an host + // action. + CudaFatBinary = nullptr; +} // Remove the CUDA actions as they are already connected to an host // action or fat binary. CudaDeviceActions.clear(); -return ABRT_Success; +return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; } else if (CurPhase == phases::Link) { // Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch. // This happens to each device action originated from each input file. @@ -3014,8 +3016,10 @@ public: } // If we can use the bundler, replace the host action by the bundling one in -// the resulting list. Otherwise, just append the device actions. -if (CanUseBundler && !OffloadAL.empty()) { +// the resulting list. Otherwise, just append the device actions. For +// device only compilation, HostAction is a null pointer, therefore only do +// this when HostAction is not a null pointer. +if (CanUseBundler && HostAction && !OffloadAL.empty()) { // Add the host action to the list in order to create the bundling action. OffloadAL.push_back(HostAction); Modified: cfe/trunk/test/Driver/cuda-phases.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-phases.cu?rev=346828&r1=346827&r2=346828&view=diff == --- cfe/trunk/test/Driver/cuda-phases.cu (original) +++ cfe/trunk/test/Driver/cuda-phases.cu Tue Nov 13 20:47:31 2018 @@ -157,6 +157,7 @@ // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) +// HBIN-NOT: device // // Test single gpu architecture up to the assemble phase in host-only // compilation mode. @@ -172,6 +173,7 @@ // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HASM-NOT: device // // Test two gpu architectures with complete compilation in host-only @@ -190,6 +192,7 @@ // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) +// HBIN2-NOT: device // // Test two gpu architectures up to the assemble phase in host-only @@ -206,6 +209,7 @@ // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) +// HASM2-NOT: device // // Test single gpu architecture with complete compilation in device-only @@ -224,7 +228,7 @@ // DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) // DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) // DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object - +// DBIN-NOT: host // // Test single gpu architecture up to the assemble phase in device-only // compilation mode. @@ -241,6 +245,7 @@ // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) // DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) // DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler +// DASM-NO
r354893 - [OpenCL] Fix assertion due to blocks
Author: yaxunl Date: Tue Feb 26 08:20:41 2019 New Revision: 354893 URL: http://llvm.org/viewvc/llvm-project?rev=354893&view=rev Log: [OpenCL] Fix assertion due to blocks A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called. There is code Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle BlockExpr and returns nullptr, which causes isa to assert. This patch fixes that. Differential Revision: https://reviews.llvm.org/D58658 Modified: cfe/trunk/lib/AST/Expr.cpp cfe/trunk/test/CodeGenOpenCL/blocks.cl Modified: cfe/trunk/lib/AST/Expr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=354893&r1=354892&r2=354893&view=diff == --- cfe/trunk/lib/AST/Expr.cpp (original) +++ cfe/trunk/lib/AST/Expr.cpp Tue Feb 26 08:20:41 2019 @@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() return DRE->getDecl(); if (MemberExpr *ME = dyn_cast(CEE)) return ME->getMemberDecl(); + if (auto *BE = dyn_cast(CEE)) +return BE->getBlockDecl(); return nullptr; } Modified: cfe/trunk/test/CodeGenOpenCL/blocks.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/blocks.cl?rev=354893&r1=354892&r2=354893&view=diff == --- cfe/trunk/test/CodeGenOpenCL/blocks.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/blocks.cl Tue Feb 26 08:20:41 2019 @@ -90,6 +90,12 @@ int get42() { return blockArgFunc(^{return 42;}); } +// COMMON-LABEL: define {{.*}}@call_block +// call {{.*}}@__call_block_block_invoke +int call_block() { + return ^int(int num) { return num; } (11); +} + // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size" // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r354929 - [CUDA][HIP] Check calling convention based on function target
Author: yaxunl Date: Tue Feb 26 14:24:49 2019 New Revision: 354929 URL: http://llvm.org/viewvc/llvm-project?rev=354929&view=rev Log: [CUDA][HIP] Check calling convention based on function target MSVC header files using vectorcall to differentiate overloaded functions, which causes failure for AMDGPU target. This is because clang does not check function calling convention based on function target. This patch checks calling convention using the proper target info. Differential Revision: https://reviews.llvm.org/D57716 Added: cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=354929&r1=354928&r2=354929&view=diff == --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original) +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Tue Feb 26 14:24:49 2019 @@ -4615,8 +4615,36 @@ bool Sema::CheckCallingConvAttr(const Pa default: llvm_unreachable("unexpected attribute kind"); } + TargetInfo::CallingConvCheckResult A = TargetInfo::CCCR_OK; const TargetInfo &TI = Context.getTargetInfo(); - TargetInfo::CallingConvCheckResult A = TI.checkCallingConvention(CC); + auto *Aux = Context.getAuxTargetInfo(); + if (LangOpts.CUDA) { +auto CudaTarget = IdentifyCUDATarget(FD); +bool CheckHost = false, CheckDevice = false; +switch (CudaTarget) { +case CFT_HostDevice: + CheckHost = true; + CheckDevice = true; + break; +case CFT_Host: + CheckHost = true; + break; +case CFT_Device: +case CFT_Global: + CheckDevice = true; + break; +case CFT_InvalidTarget: + llvm_unreachable("unexpected cuda target"); +} +auto *HostTI = LangOpts.CUDAIsDevice ? Aux : &TI; +auto *DeviceTI = LangOpts.CUDAIsDevice ? &TI : Aux; +if (CheckHost && HostTI) + A = HostTI->checkCallingConvention(CC); +if (A == TargetInfo::CCCR_OK && CheckDevice && DeviceTI) + A = DeviceTI->checkCallingConvention(CC); + } else { +A = TI.checkCallingConvention(CC); + } if (A != TargetInfo::CCCR_OK) { if (A == TargetInfo::CCCR_Warning) Diag(Attrs.getLoc(), diag::warn_cconv_ignored) << Attrs; Added: cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu?rev=354929&view=auto == --- cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu (added) +++ cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu Tue Feb 26 14:24:49 2019 @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only -verify %s + +__cdecl void hostf1(); +__vectorcall void (*hostf2)() = hostf1; // expected-error {{cannot initialize a variable of type 'void ((*))() __attribute__((vectorcall))' with an lvalue of type 'void () __attribute__((cdecl))'}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r354948 - [HIP] change kernel stub name
Author: yaxunl Date: Tue Feb 26 18:02:52 2019 New Revision: 354948 URL: http://llvm.org/viewvc/llvm-project?rev=354948&view=rev Log: [HIP] change kernel stub name Add .stub to kernel stub function name so that it is different from kernel name in device code. This is necessary to let debugger find correct symbol for kernel. Differential Revision: https://reviews.llvm.org/D58518 Added: cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354948&r1=354947&r2=354948&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Tue Feb 26 18:02:52 2019 @@ -218,6 +218,7 @@ std::string CGNVCUDARuntime::getDeviceSi void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() || + getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() || CGF.CGM.getContext().getTargetInfo().getCXXABI() != CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI()); Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=354948&r1=354947&r2=354948&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Feb 26 18:02:52 2019 @@ -1048,8 +1048,17 @@ StringRef CodeGenModule::getMangledName( // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); - auto Result = - Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD)); + std::string MangledName = getMangledNameImpl(*this, GD, ND); + + // Postfix kernel stub names with .stub to differentiate them from kernel + // names in device binaries. This is to facilitate the debugger to find + // the correct symbols for kernels in the device binary. + if (auto *FD = dyn_cast(GD.getDecl())) +if (getLangOpts().HIP && !getLangOpts().CUDAIsDevice && +FD->hasAttr()) + MangledName = MangledName + ".stub"; + + auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } Added: cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu?rev=354948&view=auto == --- cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu (added) +++ cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu Tue Feb 26 18:02:52 2019 @@ -0,0 +1,20 @@ +// RUN: echo "GPU binary would be here" > %t + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -fcuda-include-gpubinary %t -o - -x hip\ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK + +#include "Inputs/cuda.h" + +template +__global__ void kernelfunc() {} + +// CHECK-LABEL: define{{.*}}@_Z8hostfuncv() +// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]() +void hostfunc(void) { kernelfunc<<<1, 1>>>(); } + +// CHECK: define{{.*}}@[[STUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]] + +// CHECK-LABEL: define{{.*}}@__hip_register_globals +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]] ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r354990 - [NFC] minor revision of r354929 [CUDA][HIP] Check calling convention based on function target
Author: yaxunl Date: Wed Feb 27 07:46:29 2019 New Revision: 354990 URL: http://llvm.org/viewvc/llvm-project?rev=354990&view=rev Log: [NFC] minor revision of r354929 [CUDA][HIP] Check calling convention based on function target Add comments and move a variable to if block. Differential Revision: https://reviews.llvm.org/D57716 Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=354990&r1=354989&r2=354990&view=diff == --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original) +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Wed Feb 27 07:46:29 2019 @@ -4617,8 +4617,12 @@ bool Sema::CheckCallingConvAttr(const Pa TargetInfo::CallingConvCheckResult A = TargetInfo::CCCR_OK; const TargetInfo &TI = Context.getTargetInfo(); - auto *Aux = Context.getAuxTargetInfo(); + // CUDA functions may have host and/or device attributes which indicate + // their targeted execution environment, therefore the calling convention + // of functions in CUDA should be checked against the target deduced based + // on their host/device attributes. if (LangOpts.CUDA) { +auto *Aux = Context.getAuxTargetInfo(); auto CudaTarget = IdentifyCUDATarget(FD); bool CheckHost = false, CheckDevice = false; switch (CudaTarget) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r355106 - Partial revert of r353952: [HIP] Handle compile -m options and propagate into LLC
Author: yaxunl Date: Thu Feb 28 09:08:26 2019 New Revision: 355106 URL: http://llvm.org/viewvc/llvm-project?rev=355106&view=rev Log: Partial revert of r353952: [HIP] Handle compile -m options and propagate into LLC Remove comments and tests about passing -mcode-object-v3 to driver since it does not work. Other -m options are OK. Also put back -mattr=-code-object-v3 since HIP is still not ready for code object v3. Differential Revision: https://reviews.llvm.org/D57977 Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/test/Driver/hip-toolchain-features.hip Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=355106&r1=355105&r2=355106&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Feb 28 09:08:26 2019 @@ -159,7 +159,7 @@ const char *AMDGCN::Linker::constructLlc llvm::StringRef OutputFilePrefix, const char *InputFileName) const { // Construct llc command. ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa", -"-filetype=obj", +"-filetype=obj", "-mattr=-code-object-v3", Args.MakeArgString("-mcpu=" + SubArchName)}; // Extract all the -m options @@ -167,7 +167,7 @@ const char *AMDGCN::Linker::constructLlc handleTargetFeaturesGroup( Args, Features, options::OPT_m_amdgpu_Features_Group); - // Add features to mattr such as code-object-v3 and xnack + // Add features to mattr such as xnack std::string MAttrString = "-mattr="; for(auto OneFeature : Features) { MAttrString.append(Args.MakeArgString(OneFeature)); Modified: cfe/trunk/test/Driver/hip-toolchain-features.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain-features.hip?rev=355106&r1=355105&r2=355106&view=diff == --- cfe/trunk/test/Driver/hip-toolchain-features.hip (original) +++ cfe/trunk/test/Driver/hip-toolchain-features.hip Thu Feb 28 09:08:26 2019 @@ -4,17 +4,6 @@ // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ // RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mcode-object-v3 2>&1 | FileCheck %s -check-prefix=COV3 -// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mno-code-object-v3 2>&1 | FileCheck %s -check-prefix=NOCOV3 - -// COV3: {{.*}}clang{{.*}}"-target-feature" "+code-object-v3" -// NOCOV3: {{.*}}clang{{.*}}"-target-feature" "-code-object-v3" - - -// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ // RUN: -mxnack 2>&1 | FileCheck %s -check-prefix=XNACK // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ // RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ @@ -37,12 +26,12 @@ // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ // RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mcode-object-v3 -mxnack -msram-ecc \ +// RUN: -mxnack -msram-ecc \ // RUN: 2>&1 | FileCheck %s -check-prefix=ALL3 // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \ // RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ -// RUN: -mno-code-object-v3 -mno-xnack -mno-sram-ecc \ +// RUN: -mno-xnack -mno-sram-ecc \ // RUN: 2>&1 | FileCheck %s -check-prefix=NOALL3 -// ALL3: {{.*}}clang{{.*}}"-target-feature" "+code-object-v3" "-target-feature" "+xnack" "-target-feature" "+sram-ecc" -// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-code-object-v3" "-target-feature" "-xnack" "-target-feature" "-sram-ecc" +// ALL3: {{.*}}clang{{.*}}"-target-feature" "+xnack" "-target-feature" "+sram-ecc" +// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-xnack" "-target-feature" "-sram-ecc" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r355410 - [HIP] Do not unbundle object files for -fno-gpu-rdc
Author: yaxunl Date: Tue Mar 5 08:07:56 2019 New Revision: 355410 URL: http://llvm.org/viewvc/llvm-project?rev=355410&view=rev Log: [HIP] Do not unbundle object files for -fno-gpu-rdc When -fno-gpu-rdc is set, device code is compiled, linked, and assembled into fat binary and embedded as string in object files. The object files are normal object files which can be linked by host linker. In the linking stage, the object files should not be unbundled when -fno-gpu-rdc is set since they are normal object files, not bundles. The object files only need to be unbundled when -fgpu-rdc is set. Currently clang always unbundles object files, disregarding -fgpu-rdc option. This patch fixes that. Differential Revision: https://reviews.llvm.org/D58917 Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/hip-binding.hip cfe/trunk/test/Driver/hip-link-shared-library.hip Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=355410&r1=355409&r2=355410&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Tue Mar 5 08:07:56 2019 @@ -2293,6 +2293,9 @@ class OffloadingActionBuilder final { /// Flag that is set to true if this builder acted on the current input. bool IsActive = false; + +/// Flag for -fgpu-rdc. +bool Relocatable = false; public: CudaActionBuilderBase(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs, @@ -2338,6 +2341,12 @@ class OffloadingActionBuilder final { // If this is an unbundling action use it as is for each CUDA toolchain. if (auto *UA = dyn_cast(HostAction)) { + +// If -fgpu-rdc is disabled, should not unbundle since there is no +// device code to link. +if (!Relocatable) + return ABRT_Inactive; + CudaDeviceActions.clear(); auto *IA = cast(UA->getInputs().back()); std::string FileName = IA->getInputArg().getAsString(Args); @@ -2409,6 +2418,9 @@ class OffloadingActionBuilder final { !C.hasOffloadToolChain()) return false; + Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, + options::OPT_fno_gpu_rdc, /*Default=*/false); + const ToolChain *HostTC = C.getSingleOffloadToolChain(); assert(HostTC && "No toolchain for host compilation."); if (HostTC->getTriple().isNVPTX() || @@ -2594,13 +2606,11 @@ class OffloadingActionBuilder final { class HIPActionBuilder final : public CudaActionBuilderBase { /// The linker inputs obtained for each device arch. SmallVector DeviceLinkerInputs; -bool Relocatable; public: HIPActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) -: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP), - Relocatable(false) {} +: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {} bool canUseBundlerUnbundler() const override { return true; } @@ -2705,13 +2715,6 @@ class OffloadingActionBuilder final { ++I; } } - -bool initialize() override { - Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, - options::OPT_fno_gpu_rdc, /*Default=*/false); - - return CudaActionBuilderBase::initialize(); -} }; /// OpenMP action builder. The host bitcode is passed to the device frontend Modified: cfe/trunk/test/Driver/hip-binding.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-binding.hip?rev=355410&r1=355409&r2=355410&view=diff == --- cfe/trunk/test/Driver/hip-binding.hip (original) +++ cfe/trunk/test/Driver/hip-binding.hip Tue Mar 5 08:07:56 2019 @@ -4,7 +4,7 @@ // RUN: touch %t.o // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ -// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\ // RUN: 2>&1 | FileCheck %s // CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] @@ -13,3 +13,10 @@ // CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]" // CHECK-NOT: offload bundler // CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out" + +// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\ +// RUN: 2>&1 | FileCheck -check-prefix=NORDC %s + +// NORDC-NOT: offload bundler +// NORDC: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["{{.*o}}"], output: "a.out" Modified: cfe/trunk/test/Driver/hip-link-shared-library.hip URL: http://llvm.org
r355419 - Allow bundle size to be 0 in clang-offload-bundler
Author: yaxunl Date: Tue Mar 5 09:52:32 2019 New Revision: 355419 URL: http://llvm.org/viewvc/llvm-project?rev=355419&view=rev Log: Allow bundle size to be 0 in clang-offload-bundler HIP uses clang-offload-bundler to create fat binary. The bundle for host is empty. Currently clang-offload-bundler checks if the bundle size is 0 when unbundling. If so it will exit without unbundling the remaining bundles. This causes clang-offload-bundler not being able to unbundle fat binaries generated for HIP. This patch allows bundles size to be 0 when clang-offload-bundler unbundles input files. Differential Revision: https://reviews.llvm.org/D58057 Modified: cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Modified: cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp?rev=355419&r1=355418&r2=355419&view=diff == --- cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp (original) +++ cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Tue Mar 5 09:52:32 2019 @@ -292,7 +292,7 @@ public: ReadChars += TripleSize; // Check if the offset and size make sense. - if (!Size || !Offset || Offset + Size > FC.size()) + if (!Offset || Offset + Size > FC.size()) return; assert(BundlesInfo.find(Triple) == BundlesInfo.end() && ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r355421 - [CUDA][HIP][Sema] Fix template kernel with function as template parameter
Author: yaxunl Date: Tue Mar 5 10:19:35 2019 New Revision: 355421 URL: http://llvm.org/viewvc/llvm-project?rev=355421&view=rev Log: [CUDA][HIP][Sema] Fix template kernel with function as template parameter If a kernel template has a function as its template parameter, a device function should be allowed as template argument since a kernel can call a device function. However, currently if the kernel template is instantiated in a host function, clang will emit an error message saying the device function is an invalid candidate for the template parameter. This happens because clang checks the reference to the device function during parsing the template arguments. At this point, the template is not instantiated yet. Clang incorrectly assumes the device function is called by the host function and emits the error message. This patch fixes the issue by disabling checking of device function during parsing template arguments and deferring the check to the instantion of the template. At that point, the template decl is already available, therefore the check can be done against the instantiated function template decl. Differential Revision: https://reviews.llvm.org/D56411 Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp cfe/trunk/lib/Sema/SemaExpr.cpp cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=355421&r1=355420&r2=355421&view=diff == --- cfe/trunk/lib/Sema/SemaCUDA.cpp (original) +++ cfe/trunk/lib/Sema/SemaCUDA.cpp Tue Mar 5 10:19:35 2019 @@ -675,6 +675,11 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIf bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); assert(Callee && "Callee may not be null."); + + auto &ExprEvalCtx = ExprEvalContexts.back(); + if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) +return true; + // FIXME: Is bailing out early correct here? Should we instead assume that // the caller is a global initializer? FunctionDecl *Caller = dyn_cast(CurContext); Modified: cfe/trunk/lib/Sema/SemaExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=355421&r1=355420&r2=355421&view=diff == --- cfe/trunk/lib/Sema/SemaExpr.cpp (original) +++ cfe/trunk/lib/Sema/SemaExpr.cpp Tue Mar 5 10:19:35 2019 @@ -14799,6 +14799,9 @@ void Sema::MarkFunctionReferenced(Source if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) ResolveExceptionSpec(Loc, FPT); + if (getLangOpts().CUDA) +CheckCUDACall(Loc, Func); + // If we don't need to mark the function as used, and we don't need to // try to provide a definition, there's nothing more to do. if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) && Modified: cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu?rev=355421&r1=355420&r2=355421&view=diff == --- cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu (original) +++ cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu Tue Mar 5 10:19:35 2019 @@ -37,7 +37,7 @@ __host__ __device__ void T::hd3() { } template __host__ __device__ void hd2() { device_fn(); } -// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}} +// expected-error@-1 2 {{reference to __device__ function 'device_fn' in __host__ __device__ function}} void host_fn() { hd2(); } __host__ __device__ void hd() { device_fn(); } @@ -90,3 +90,8 @@ __host__ __device__ void fn_ptr_template static __host__ __device__ void hd_func() { device_fn(); } __global__ void kernel() { hd_func(); } void host_func(void) { kernel<<<1, 1>>>(); } + +// Should allow host function call kernel template with device function argument. +__device__ void f(); +template __global__ void t() { F(); } +__host__ void g() { t<<<1,1>>>(); } Modified: cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu?rev=355421&r1=355420&r2=355421&view=diff == --- cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu (original) +++ cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu Tue Mar 5 10:19:35 2019 @@ -56,14 +56,14 @@ __host__ __device__ void T::hd3() { } template __host__ __device__ void hd2() { host_fn(); } -// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} +// expected-error@-1 2 {{reference to __host__ f
r358290 - [HIP] Use -mlink-builtin-bitcode to link device library
Author: yaxunl Date: Fri Apr 12 09:23:31 2019 New Revision: 358290 URL: http://llvm.org/viewvc/llvm-project?rev=358290&view=rev Log: [HIP] Use -mlink-builtin-bitcode to link device library Use -mlink-builtin-bitcode instead of llvm-link to link device library so that device library bitcode and user device code can be compiled in a consistent way. This is the same approach used by CUDA and OpenMP. Differential Revision: https://reviews.llvm.org/D60513 Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/test/Driver/hip-device-libs.hip cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip cfe/trunk/test/Driver/hip-toolchain-rdc.hip Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=358290&r1=358289&r2=358290&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Fri Apr 12 09:23:31 2019 @@ -31,7 +31,7 @@ using namespace llvm::opt; namespace { -static void addBCLib(Compilation &C, const ArgList &Args, +static void addBCLib(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs, ArgStringList LibraryPaths, StringRef BCName) { StringRef FullName; @@ -40,11 +40,12 @@ static void addBCLib(Compilation &C, con llvm::sys::path::append(Path, BCName); FullName = Path; if (llvm::sys::fs::exists(FullName)) { + CmdArgs.push_back("-mlink-builtin-bitcode"); CmdArgs.push_back(Args.MakeArgString(FullName)); return; } } - C.getDriver().Diag(diag::err_drv_no_such_file) << BCName; + D.Diag(diag::err_drv_no_such_file) << BCName; } } // namespace @@ -58,44 +59,6 @@ const char *AMDGCN::Linker::constructLLV for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); - ArgStringList LibraryPaths; - - // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. - for (auto Path : Args.getAllArgValues(options::OPT_hip_device_lib_path_EQ)) -LibraryPaths.push_back(Args.MakeArgString(Path)); - - addDirectoryList(Args, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH"); - - llvm::SmallVector BCLibs; - - // Add bitcode library in --hip-device-lib. - for (auto Lib : Args.getAllArgValues(options::OPT_hip_device_lib_EQ)) { -BCLibs.push_back(Args.MakeArgString(Lib)); - } - - // If --hip-device-lib is not set, add the default bitcode libraries. - if (BCLibs.empty()) { -// Get the bc lib file name for ISA version. For example, -// gfx803 => oclc_isa_version_803.amdgcn.bc. -std::string ISAVerBC = -"oclc_isa_version_" + SubArchName.drop_front(3).str() + ".amdgcn.bc"; - -llvm::StringRef FlushDenormalControlBC; -if (Args.hasArg(options::OPT_fcuda_flush_denormals_to_zero)) - FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc"; -else - FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc"; - -BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", - "ocml.amdgcn.bc", "ockl.amdgcn.bc", - "oclc_finite_only_off.amdgcn.bc", - FlushDenormalControlBC, - "oclc_correctly_rounded_sqrt_on.amdgcn.bc", - "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC}); - } - for (auto Lib : BCLibs) -addBCLib(C, Args, CmdArgs, LibraryPaths, Lib); - // Add an intermediate output file. CmdArgs.push_back("-o"); std::string TmpName = @@ -324,6 +287,44 @@ void HIPToolChain::addClangTargetOptions CC1Args.append({"-fvisibility", "hidden"}); CC1Args.push_back("-fapply-global-visibility-to-externs"); } + ArgStringList LibraryPaths; + + // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. + for (auto Path : + DriverArgs.getAllArgValues(options::OPT_hip_device_lib_path_EQ)) +LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); + + addDirectoryList(DriverArgs, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH"); + + llvm::SmallVector BCLibs; + + // Add bitcode library in --hip-device-lib. + for (auto Lib : DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ)) { +BCLibs.push_back(DriverArgs.MakeArgString(Lib)); + } + + // If --hip-device-lib is not set, add the default bitcode libraries. + if (BCLibs.empty()) { +// Get the bc lib file name for ISA version. For example, +// gfx803 => oclc_isa_version_803.amdgcn.bc. +std::string ISAVerBC = +"oclc_isa_version_" + GpuArch.drop_front(3).str() + ".amdgcn.bc"; + +llvm::StringRef FlushDenormalControlBC; +if (DriverArgs.hasArg(options::OPT_fcuda_flush_denormals_to_zero)) + FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc"; +else + FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc"; + +BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc", + "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", + FlushDenormalCont
r359594 - AMDGPU: Enable _Float16
Author: yaxunl Date: Tue Apr 30 11:35:37 2019 New Revision: 359594 URL: http://llvm.org/viewvc/llvm-project?rev=359594&view=rev Log: AMDGPU: Enable _Float16 Added: cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=359594&r1=359593&r2=359594&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Apr 30 11:35:37 2019 @@ -252,6 +252,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const !isAMDGCN(Triple)); UseAddrSpaceMapMangling = true; + HasLegalHalfType = true; + HasFloat16 = true; + // Set pointer width and alignment for target address space 0. PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); if (getMaxPointerWidth() == 64) { Added: cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp?rev=359594&view=auto == --- cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp (added) +++ cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp Tue Apr 30 11:35:37 2019 @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx701 -S -o - %s | FileCheck %s -check-prefix=NOF16 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx803 -S -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -S -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -S -o - %s | FileCheck %s +void f() { + _Float16 x, y, z; + // CHECK: v_add_f16_e64 + // NOF16: v_add_f32_e64 + z = x + y; + // CHECK: v_sub_f16_e64 + // NOF16: v_sub_f32_e64 + z = x - y; + // CHECK: v_mul_f16_e64 + // NOF16: v_mul_f32_e64 + z = x * y; + // CHECK: v_div_fixup_f16 + // NOF16: v_div_fixup_f32 + z = x / y; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r359598 - Add requires amdgpu-registered-target for amdgpu-float16.cpp
Author: yaxunl Date: Tue Apr 30 12:06:15 2019 New Revision: 359598 URL: http://llvm.org/viewvc/llvm-project?rev=359598&view=rev Log: Add requires amdgpu-registered-target for amdgpu-float16.cpp Modified: cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp Modified: cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp?rev=359598&r1=359597&r2=359598&view=diff == --- cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp (original) +++ cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp Tue Apr 30 12:06:15 2019 @@ -1,3 +1,4 @@ +// REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx701 -S -o - %s | FileCheck %s -check-prefix=NOF16 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx803 -S -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -S -o - %s | FileCheck %s ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r343611 - [HIP] Support early finalization of device code for -fno-gpu-rdc
Author: yaxunl Date: Tue Oct 2 10:48:54 2018 New Revision: 343611 URL: http://llvm.org/viewvc/llvm-project?rev=343611&view=rev Log: [HIP] Support early finalization of device code for -fno-gpu-rdc This patch renames -f{no-}cuda-rdc to -f{no-}gpu-rdc and keeps the original options as aliases. When -fgpu-rdc is off, clang will assume the device code in each translation unit does not call external functions except those in the device library, therefore it is possible to compile the device code in each translation unit to self-contained kernels and embed them in the host object, so that the host object behaves like usual host object which can be linked by lld. The benefits of this feature is: 1. allow users to create static libraries which can be linked by host linker; 2. amortized device code linking time. This patch modifies HIP action builder to insert actions for linking device code and generating HIP fatbin, and pass HIP fatbin to host backend action. It extracts code for constructing command for generating HIP fatbin as a function so that it can be reused by early finalization. It also modifies codegen of HIP host constructor functions to embed the device fatbin when it is available. Differential Revision: https://reviews.llvm.org/D52377 Added: cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip cfe/trunk/test/Driver/hip-toolchain-rdc.hip Removed: cfe/trunk/test/Driver/hip-toolchain.hip Modified: cfe/trunk/include/clang/Basic/LangOptions.def cfe/trunk/include/clang/Driver/Options.td cfe/trunk/include/clang/Driver/Types.def cfe/trunk/lib/AST/Decl.cpp cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/ToolChains/Clang.cpp cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp cfe/trunk/lib/Driver/ToolChains/Cuda.cpp cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/lib/Driver/ToolChains/HIP.h cfe/trunk/lib/Frontend/CompilerInvocation.cpp cfe/trunk/lib/Sema/SemaDeclAttr.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu cfe/trunk/test/Driver/cuda-external-tools.cu cfe/trunk/test/Driver/cuda-phases.cu cfe/trunk/test/Driver/hip-output-file-name.hip cfe/trunk/test/SemaCUDA/extern-shared.cu Modified: cfe/trunk/include/clang/Basic/LangOptions.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=343611&r1=343610&r2=343611&view=diff == --- cfe/trunk/include/clang/Basic/LangOptions.def (original) +++ cfe/trunk/include/clang/Basic/LangOptions.def Tue Oct 2 10:48:54 2018 @@ -211,7 +211,7 @@ LANGOPT(CUDAIsDevice , 1, 0, "compi LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code") LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__") LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") -LANGOPT(CUDARelocatableDeviceCode, 1, 0, "generate relocatable device code") +LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=343611&r1=343610&r2=343611&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Tue Oct 2 10:48:54 2018 @@ -584,9 +584,11 @@ def fno_cuda_flush_denormals_to_zero : F def fcuda_approx_transcendentals : Flag<["-"], "fcuda-approx-transcendentals">, Flags<[CC1Option]>, HelpText<"Use approximate transcendental functions">; def fno_cuda_approx_transcendentals : Flag<["-"], "fno-cuda-approx-transcendentals">; -def fcuda_rdc : Flag<["-"], "fcuda-rdc">, Flags<[CC1Option]>, +def fgpu_rdc : Flag<["-"], "fgpu-rdc">, Flags<[CC1Option]>, HelpText<"Generate relocatable device code, also known as separate compilation mode.">; -def fno_cuda_rdc : Flag<["-"], "fno-cuda-rdc">; +def fno_gpu_rdc : Flag<["-"], "fno-gpu-rdc">; +def : Flag<["-"], "fcuda-rdc">, Alias; +def : Flag<["-"], "fno-cuda-rdc">, Alias; def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">; Modified: cfe/trunk/include/clang/Driver/Types.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=343611&r1=343610&r2=343611&view=diff == --- cfe/trunk/include/clang/Driver/Types.def (original) +++ cfe/trunk/include/clang/Driver/Types.def Tue Oct 2 10:48:54 2018 @@ -101,4 +101,5 @@ TYPE("image",
r352801 - Do not copy long double and 128-bit fp format from aux target for AMDGPU
Author: yaxunl Date: Thu Jan 31 13:57:51 2019 New Revision: 352801 URL: http://llvm.org/viewvc/llvm-project?rev=352801&view=rev Log: Do not copy long double and 128-bit fp format from aux target for AMDGPU rC352620 caused regressions because it copied floating point format from aux target. floating point format decides whether extended long double is supported. It is x86_fp80 on x86 but IEEE double on amdgcn. Document usage of long doubel type in HIP programming guide https://github.com/ROCm-Developer-Tools/HIP/pull/890 Differential Revision: https://reviews.llvm.org/D57527 Added: cfe/trunk/test/CodeGenCUDA/types.cu Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=352801&r1=352800&r2=352801&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Thu Jan 31 13:57:51 2019 @@ -307,5 +307,16 @@ void AMDGPUTargetInfo::getTargetDefines( } void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { + assert(HalfFormat == Aux->HalfFormat); + assert(FloatFormat == Aux->FloatFormat); + assert(DoubleFormat == Aux->DoubleFormat); + + // On x86_64 long double is 80-bit extended precision format, which is + // not supported by AMDGPU. 128-bit floating point format is also not + // supported by AMDGPU. Therefore keep its own format for these two types. + auto SaveLongDoubleFormat = LongDoubleFormat; + auto SaveFloat128Format = Float128Format; copyAuxTarget(Aux); + LongDoubleFormat = SaveLongDoubleFormat; + Float128Format = SaveFloat128Format; } Added: cfe/trunk/test/CodeGenCUDA/types.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/types.cu?rev=352801&view=auto == --- cfe/trunk/test/CodeGenCUDA/types.cu (added) +++ cfe/trunk/test/CodeGenCUDA/types.cu Thu Jan 31 13:57:51 2019 @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple amdgcn -aux-triple x86_64 -fcuda-is-device -emit-llvm %s -o - | FileCheck -check-prefix=DEV %s +// RUN: %clang_cc1 -triple x86_64 -aux-triple amdgcn -emit-llvm %s -o - | FileCheck -check-prefix=HOST %s + +#include "Inputs/cuda.h" + +// HOST: @ld_host = global x86_fp80 0xK +long double ld_host; + +// DEV: @ld_device = addrspace(1) externally_initialized global double 0.00e+00 +__device__ long double ld_device; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r354004 - [CUDA][HIP] Use device side kernel and variable names when registering them
Author: yaxunl Date: Wed Feb 13 18:00:09 2019 New Revision: 354004 URL: http://llvm.org/viewvc/llvm-project?rev=354004&view=rev Log: [CUDA][HIP] Use device side kernel and variable names when registering them __hipRegisterFunction and __hipRegisterVar need to accept device side kernel and variable names so that HIP runtime can associate kernel stub functions in host code with kernel symbols in fat binaries, and associate shadow variables in host code with device variables in fat binaries. Currently, clang assumes kernel functions and device variables have the same name as the kernel stub functions and shadow variables. However, when host is compiled in windows with MSVC C++ ABI and device is compiled with Itanium C++ ABI (e.g. AMDGPU), kernels and device symbols in fat binary are mangled differently than host. This patch gets the device side kernel and variable name by mangling them in the mangle context of aux target. Differential Revision: https://reviews.llvm.org/D58163 Modified: cfe/trunk/include/clang/AST/ASTContext.h cfe/trunk/lib/AST/ASTContext.cpp cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/lib/CodeGen/CGCUDARuntime.h cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu Modified: cfe/trunk/include/clang/AST/ASTContext.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/ASTContext.h?rev=354004&r1=354003&r2=354004&view=diff == --- cfe/trunk/include/clang/AST/ASTContext.h (original) +++ cfe/trunk/include/clang/AST/ASTContext.h Wed Feb 13 18:00:09 2019 @@ -2237,7 +2237,8 @@ public: VTableContextBase *getVTableContext(); - MangleContext *createMangleContext(); + /// If \p T is null pointer, assume the target in ASTContext. + MangleContext *createMangleContext(const TargetInfo *T = nullptr); void DeepCollectObjCIvars(const ObjCInterfaceDecl *OI, bool leafClass, SmallVectorImpl &Ivars) const; Modified: cfe/trunk/lib/AST/ASTContext.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=354004&r1=354003&r2=354004&view=diff == --- cfe/trunk/lib/AST/ASTContext.cpp (original) +++ cfe/trunk/lib/AST/ASTContext.cpp Wed Feb 13 18:00:09 2019 @@ -9981,8 +9981,10 @@ VTableContextBase *ASTContext::getVTable return VTContext.get(); } -MangleContext *ASTContext::createMangleContext() { - switch (Target->getCXXABI().getKind()) { +MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + if (!T) +T = Target; + switch (T->getCXXABI().getKind()) { case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354004&r1=354003&r2=354004&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Wed Feb 13 18:00:09 2019 @@ -42,14 +42,25 @@ private: /// Convenience reference to the current module llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module - llvm::SmallVector EmittedKernels; - llvm::SmallVector, 16> DeviceVars; + struct KernelInfo { +llvm::Function *Kernel; +const Decl *D; + }; + llvm::SmallVector EmittedKernels; + struct VarInfo { +llvm::GlobalVariable *Var; +const VarDecl *D; +unsigned Flag; + }; + llvm::SmallVector DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() llvm::GlobalVariable *GpuBinaryHandle = nullptr; /// Whether we generate relocatable device code. bool RelocatableDeviceCode; + /// Mangle context for device. + std::unique_ptr DeviceMC; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -106,13 +117,15 @@ private: void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); + std::string getDeviceSideName(const Decl *ND); public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; - void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { -DeviceVars.push_back(std::make_pair(&Var, Flags)); + void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) override { +DeviceVars.push_back({&Var, VD, Flags}); } /// Creates module constructor function @@ -138,7 +151,9 @@ CGNVCUDARuntime::addUnderscoredPrefixToN CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
r354615 - [HIP] change kernel stub name
Author: yaxunl Date: Thu Feb 21 12:12:16 2019 New Revision: 354615 URL: http://llvm.org/viewvc/llvm-project?rev=354615&view=rev Log: [HIP] change kernel stub name Add .stub to kernel stub function name so that it is different from kernel name in device code. This is necessary to let debugger find correct symbol for kernel Differential Revision: https://reviews.llvm.org/D58518 Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354615&r1=354614&r2=354615&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Thu Feb 21 12:12:16 2019 @@ -227,6 +227,12 @@ void CGNVCUDARuntime::emitDeviceStub(Cod emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); + + // Postfix kernel stub names with .stub to differentiate them from kernel + // names in device binaries. This is to facilitate the debugger to find + // the correct symbols for kernels in the device binary. + if (CGF.getLangOpts().HIP) +CGF.CurFn->setName(CGF.CurFn->getName() + ".stub"); } // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=354615&r1=354614&r2=354615&view=diff == --- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original) +++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Thu Feb 21 12:12:16 2019 @@ -145,7 +145,8 @@ void use_pointers() { // Test that we build the correct number of calls to cudaSetupArgument followed // by a call to cudaLaunch. -// LNX: define{{.*}}kernelfunc +// CUDA-LABEL: define{{.*}}kernelfunc +// HIP-LABEL: define{{.*}}@_Z10kernelfunciii.stub // New launch sequence stores arguments into local buffer and passes array of // pointers to them directly to cudaLaunchKernel ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r354651 - revert r354615: [HIP] change kernel stub name
Author: yaxunl Date: Thu Feb 21 20:20:12 2019 New Revision: 354651 URL: http://llvm.org/viewvc/llvm-project?rev=354651&view=rev Log: revert r354615: [HIP] change kernel stub name It caused regressions. Differential Revision: https://reviews.llvm.org/D58518 Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354651&r1=354650&r2=354651&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Thu Feb 21 20:20:12 2019 @@ -227,12 +227,6 @@ void CGNVCUDARuntime::emitDeviceStub(Cod emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); - - // Postfix kernel stub names with .stub to differentiate them from kernel - // names in device binaries. This is to facilitate the debugger to find - // the correct symbols for kernels in the device binary. - if (CGF.getLangOpts().HIP) -CGF.CurFn->setName(CGF.CurFn->getName() + ".stub"); } // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=354651&r1=354650&r2=354651&view=diff == --- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original) +++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Thu Feb 21 20:20:12 2019 @@ -145,8 +145,7 @@ void use_pointers() { // Test that we build the correct number of calls to cudaSetupArgument followed // by a call to cudaLaunch. -// CUDA-LABEL: define{{.*}}kernelfunc -// HIP-LABEL: define{{.*}}@_Z10kernelfunciii.stub +// LNX: define{{.*}}kernelfunc // New launch sequence stores arguments into local buffer and passes array of // pointers to them directly to cudaLaunchKernel ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r312441 - [OpenCL] Do not use vararg in emitted functions for enqueue_kernel
Author: yaxunl Date: Sun Sep 3 06:52:24 2017 New Revision: 312441 URL: http://llvm.org/viewvc/llvm-project?rev=312441&view=rev Log: [OpenCL] Do not use vararg in emitted functions for enqueue_kernel Not all targets support vararg (e.g. amdgpu). Instead of using vararg in the emitted functions for enqueue_kernel, this patch creates a temporary array of size_t, stores the size arguments in the temporary array and passes it to the emitted functions for enqueue_kernel. Differential Revision: https://reviews.llvm.org/D36678 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=312441&r1=312440&r2=312441&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Sep 3 06:52:24 2017 @@ -2601,27 +2601,50 @@ RValue CodeGenFunction::EmitBuiltinExpr( } assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); +// Create a temporary array to hold the sizes of local pointer arguments +// for the block. \p First is the position of the first size argument. +auto CreateArrayForSizeVar = [=](unsigned First) { + auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); + auto *Arr = Builder.CreateAlloca(AT); + llvm::Value *Ptr; + // Each of the following arguments specifies the size of the corresponding + // argument passed to the enqueued block. + auto *Zero = llvm::ConstantInt::get(IntTy, 0); + for (unsigned I = First; I < NumArgs; ++I) { +auto *Index = llvm::ConstantInt::get(IntTy, I - First); +auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); +if (I == First) + Ptr = GEP; +auto *V = +Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); +Builder.CreateAlignedStore( +V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); + } + return Ptr; +}; + // Could have events and/or vaargs. if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_vaargs"; - llvm::Value *Block = Builder.CreatePointerCast( - EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); + auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)), + GenericVoidPtrTy); + auto *PtrToSizeArray = CreateArrayForSizeVar(4); + // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. - std::vector Args = {Queue, Flags, Range, Block, - ConstantInt::get(IntTy, NumArgs - 4)}; - std::vector ArgTys = {QueueTy, IntTy, RangeTy, - GenericVoidPtrTy, IntTy}; - - // Each of the following arguments specifies the size of the corresponding - // argument passed to the enqueued block. - for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) -Args.push_back( -Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); + std::vector Args = {Queue, + Flags, + Range, + Block, + ConstantInt::get(IntTy, NumArgs - 4), + PtrToSizeArray}; + std::vector ArgTys = {QueueTy, IntTy, + RangeTy, GenericVoidPtrTy, + IntTy, PtrToSizeArray->getType()}; llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef(ArgTys), true); + Int32Ty, llvm::ArrayRef(ArgTys), false); return RValue::get( Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef(Args))); @@ -2667,14 +2690,12 @@ RValue CodeGenFunction::EmitBuiltinExpr( ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_vaargs"; - // Each of the following arguments specifies the size of the corresponding - // argument passed to the enqueued block. - for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) -Args.push_back( -Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); + auto *PtrToSizeArray = CreateArrayForSizeVar(7); + Args.push_back(PtrToSizeArray); + ArgTys.push_back(PtrToSizeArray->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef(ArgTys), true); + Int32Ty, llvm::ArrayRef(ArgTys), false); return RValue::ge
r313171 - [AMDGPU] Change addr space of clk_event_t, queue_t and reserve_id_t to global
Author: yaxunl Date: Wed Sep 13 11:50:42 2017 New Revision: 313171 URL: http://llvm.org/viewvc/llvm-project?rev=313171&view=rev Log: [AMDGPU] Change addr space of clk_event_t, queue_t and reserve_id_t to global Differential Revision: https://reviews.llvm.org/D37703 Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/test/CodeGenOpenCL/opencl_types.cl Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=313171&r1=313170&r2=313171&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Wed Sep 13 11:50:42 2017 @@ -202,6 +202,10 @@ public: case BuiltinType::Id: \ return LangAS::opencl_constant; #include "clang/Basic/OpenCLImageTypes.def" +case BuiltinType::OCLClkEvent: +case BuiltinType::OCLQueue: +case BuiltinType::OCLReserveID: + return LangAS::opencl_global; default: return TargetInfo::getOpenCLTypeAddrSpace(T); Modified: cfe/trunk/test/CodeGenOpenCL/opencl_types.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/opencl_types.cl?rev=313171&r1=313170&r2=313171&view=diff == --- cfe/trunk/test/CodeGenOpenCL/opencl_types.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/opencl_types.cl Wed Sep 13 11:50:42 2017 @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "spir-unknown-unknown" -emit-llvm -o - -O0 | FileCheck %s --check-prefix=CHECK-SPIR -// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -O0 | FileCheck %s --check-prefix=CHECK-AMDGCN +// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "spir-unknown-unknown" -emit-llvm -o - -O0 | FileCheck %s --check-prefixes=CHECK-COM,CHECK-SPIR +// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -O0 | FileCheck %s --check-prefixes=CHECK-COM,CHECK-AMDGCN #define CLK_ADDRESS_CLAMP_TO_EDGE 2 #define CLK_NORMALIZED_COORDS_TRUE 1 @@ -7,7 +7,7 @@ #define CLK_FILTER_LINEAR 0x20 constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_NEAREST; -// CHECK-SPIR-NOT: constant i32 +// CHECK-COM-NOT: constant i32 void fnc1(image1d_t img) {} // CHECK-SPIR: @fnc1(%opencl.image1d_ro_t addrspace(1)* @@ -39,20 +39,23 @@ void fnc4smp(sampler_t s) {} kernel void foo(image1d_t img) { sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_LINEAR; - // CHECK-SPIR: alloca %opencl.sampler_t addrspace(2)* + // CHECK-COM: alloca %opencl.sampler_t addrspace(2)* event_t evt; - // CHECK-SPIR: alloca %opencl.event_t* + // CHECK-COM: alloca %opencl.event_t* clk_event_t clk_evt; // CHECK-SPIR: alloca %opencl.clk_event_t* + // CHECK-AMDGCN: alloca %opencl.clk_event_t addrspace(1)* queue_t queue; // CHECK-SPIR: alloca %opencl.queue_t* + // CHECK-AMDGCN: alloca %opencl.queue_t addrspace(1)* reserve_id_t rid; // CHECK-SPIR: alloca %opencl.reserve_id_t* - // CHECK-SPIR: store %opencl.sampler_t addrspace(2)* + // CHECK-AMDGCN: alloca %opencl.reserve_id_t addrspace(1)* + // CHECK-COM: store %opencl.sampler_t addrspace(2)* fnc4smp(smp); - // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)* + // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)* fnc4smp(glb_smp); - // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)* + // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)* } kernel void foo_pipe(read_only pipe int p) {} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r313172 - Add more tests for OpenCL atomic builtin functions
Author: yaxunl Date: Wed Sep 13 11:56:25 2017 New Revision: 313172 URL: http://llvm.org/viewvc/llvm-project?rev=313172&view=rev Log: Add more tests for OpenCL atomic builtin functions Add tests for different address spaces and insert some blank lines to make them more readable. Differential Revision: https://reviews.llvm.org/D37742 Modified: cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl Modified: cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl?rev=313172&r1=313171&r2=313172&view=diff == --- cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl Wed Sep 13 11:56:25 2017 @@ -18,39 +18,64 @@ typedef enum memory_scope { #endif } memory_scope; -void f(atomic_int *i, atomic_uint *ui, int cmp, int order, int scope) { +void f(atomic_int *i, global atomic_int *gi, local atomic_int *li, private atomic_int *pi, atomic_uint *ui, int cmp, int order, int scope) { int x; // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1) // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1) x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); + // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); + + // SPIR: %[[GP:[0-9]+]] = addrspacecast i8 addrspace(1)* {{%[0-9]+}} to i8 addrspace(4)* + // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 {{%[0-9]+}}, i32 5, i32 1) + // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + __opencl_atomic_store(gi, 1, memory_order_seq_cst, memory_scope_work_group); + + // SPIR: %[[GP:[0-9]+]] = addrspacecast i8 addrspace(3)* {{%[0-9]+}} to i8 addrspace(4)* + // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 {{%[0-9]+}}, i32 5, i32 1) + // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + __opencl_atomic_store(li, 1, memory_order_seq_cst, memory_scope_work_group); + + // SPIR: %[[GP:[0-9]+]] = addrspacecast i8* {{%[0-9]+}} to i8 addrspace(4)* + // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 {{%[0-9]+}}, i32 5, i32 1) + // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + __opencl_atomic_store(pi, 1, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2) // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[
r363076 - Revert r344630 Disable code object version 3 for HIP toolchain.
Author: yaxunl Date: Tue Jun 11 08:05:11 2019 New Revision: 363076 URL: http://llvm.org/viewvc/llvm-project?rev=363076&view=rev Log: Revert r344630 Disable code object version 3 for HIP toolchain. Remove the workaround so that by default code object v3 is enabled. Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=363076&r1=363075&r2=363076&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Tue Jun 11 08:05:11 2019 @@ -127,7 +127,7 @@ const char *AMDGCN::Linker::constructLlc llvm::StringRef OutputFilePrefix, const char *InputFileName) const { // Construct llc command. ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa", -"-filetype=obj", "-mattr=-code-object-v3", +"-filetype=obj", Args.MakeArgString("-mcpu=" + SubArchName)}; // Extract all the -m options ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r363414 - [AMDGPU] Enable the implicit arguments for HIP (CLANG)
Author: yaxunl Date: Fri Jun 14 08:54:47 2019 New Revision: 363414 URL: http://llvm.org/viewvc/llvm-project?rev=363414&view=rev Log: [AMDGPU] Enable the implicit arguments for HIP (CLANG) Enable 48-bytes of implicit arguments for HIP as well. Earlier it was enabled for OpenCL. This code is specific to AMDGPU target. Differential Revision: https://reviews.llvm.org/D62244 Added: cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=363414&r1=363413&r2=363414&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Fri Jun 14 08:54:47 2019 @@ -7868,7 +7868,8 @@ void AMDGPUTargetCodeGenInfo::setTargetA const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr() : nullptr; - if (M.getLangOpts().OpenCL && FD->hasAttr() && + if (((M.getLangOpts().OpenCL && FD->hasAttr()) || + (M.getLangOpts().HIP && FD->hasAttr())) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "48"); Added: cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu?rev=363414&view=auto == --- cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu (added) +++ cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu Fri Jun 14 08:54:47 2019 @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s +#include "Inputs/cuda.h" + +__global__ void hip_kernel_temp() { +} + +// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="48" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r364380 - Fix build failure due to missing break
Author: yaxunl Date: Tue Jun 25 20:33:03 2019 New Revision: 364380 URL: http://llvm.org/viewvc/llvm-project?rev=364380&view=rev Log: Fix build failure due to missing break Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=364380&r1=364379&r2=364380&view=diff == --- cfe/trunk/lib/Basic/Targets/ARM.cpp (original) +++ cfe/trunk/lib/Basic/Targets/ARM.cpp Tue Jun 25 20:33:03 2019 @@ -910,6 +910,7 @@ bool ARMTargetInfo::validateAsmConstrain Name++; return true; } +break; case 'U': // a memory reference... switch (Name[1]) { case 'q': // ...ARMV4 ldrsb @@ -925,6 +926,7 @@ bool ARMTargetInfo::validateAsmConstrain Name++; return true; } +break; } return false; } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r364381 - [HIP] Support attribute hip_pinned_shadow
Author: yaxunl Date: Tue Jun 25 20:47:37 2019 New Revision: 364381 URL: http://llvm.org/viewvc/llvm-project?rev=364381&view=rev Log: [HIP] Support attribute hip_pinned_shadow This patch introduces support of hip_pinned_shadow variable for HIP. A hip_pinned_shadow variable is a global variable with attribute hip_pinned_shadow. It has external linkage on device side and has no initializer. It has internal linkage on host side and has initializer or static constructor. It can be accessed in both device code and host code. This allows HIP runtime to implement support of HIP texture reference. Differential Revision: https://reviews.llvm.org/D62738 Added: cfe/trunk/test/AST/ast-dump-hip-pinned-shadow.cu cfe/trunk/test/CodeGenCUDA/hip-pinned-shadow.cu cfe/trunk/test/SemaCUDA/hip-pinned-shadow.cu Modified: cfe/trunk/include/clang/Basic/Attr.td cfe/trunk/include/clang/Basic/AttrDocs.td cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/lib/Driver/ToolChains/HIP.cpp cfe/trunk/lib/Sema/SemaDeclAttr.cpp cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip cfe/trunk/test/Driver/hip-toolchain-rdc.hip cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test Modified: cfe/trunk/include/clang/Basic/Attr.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?rev=364381&r1=364380&r2=364381&view=diff == --- cfe/trunk/include/clang/Basic/Attr.td (original) +++ cfe/trunk/include/clang/Basic/Attr.td Tue Jun 25 20:47:37 2019 @@ -295,6 +295,7 @@ class LangOpt; def Borland : LangOpt<"Borland">; def CUDA : LangOpt<"CUDA">; +def HIP : LangOpt<"HIP">; def COnly : LangOpt<"COnly", "!LangOpts.CPlusPlus">; def CPlusPlus : LangOpt<"CPlusPlus">; def OpenCL : LangOpt<"OpenCL">; @@ -957,6 +958,13 @@ def CUDADevice : InheritableAttr { let Documentation = [Undocumented]; } +def HIPPinnedShadow : InheritableAttr { + let Spellings = [GNU<"hip_pinned_shadow">, Declspec<"__hip_pinned_shadow__">]; + let Subjects = SubjectList<[Var]>; + let LangOpts = [HIP]; + let Documentation = [HIPPinnedShadowDocs]; +} + def CUDADeviceBuiltin : IgnoredAttr { let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">]; let LangOpts = [CUDA]; Modified: cfe/trunk/include/clang/Basic/AttrDocs.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AttrDocs.td?rev=364381&r1=364380&r2=364381&view=diff == --- cfe/trunk/include/clang/Basic/AttrDocs.td (original) +++ cfe/trunk/include/clang/Basic/AttrDocs.td Tue Jun 25 20:47:37 2019 @@ -4183,3 +4183,15 @@ This attribute does not affect optimizat ``__attribute__((malloc))``. }]; } + +def HIPPinnedShadowDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The GNU style attribute __attribute__((hip_pinned_shadow)) or MSVC style attribute +__declspec(hip_pinned_shadow) can be added to the definition of a global variable +to indicate it is a HIP pinned shadow variable. A HIP pinned shadow variable can +be accessed on both device side and host side. It has external linkage and is +not initialized on device side. It has internal linkage and is initialized by +the initializer on host side. + }]; +} \ No newline at end of file Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=364381&r1=364380&r2=364381&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Jun 25 20:47:37 2019 @@ -2415,7 +2415,8 @@ void CodeGenModule::EmitGlobal(GlobalDec if (!Global->hasAttr() && !Global->hasAttr() && !Global->hasAttr() && - !Global->hasAttr()) + !Global->hasAttr() && + !(LangOpts.HIP && Global->hasAttr())) return; } else { // We need to emit host-side 'shadows' for all global @@ -3781,7 +3782,12 @@ void CodeGenModule::EmitGlobalVarDefinit !getLangOpts().CUDAIsDevice && (D->hasAttr() || D->hasAttr() || D->hasAttr()); - if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar)) + // HIP pinned shadow of initialized host-side global variables are also + // left undefined. + bool IsHIPPinnedShadowVar = + getLangOpts().CUDAIsDevice && D->hasAttr(); + if (getLangOpts().CUDA && + (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are @@ -3892,7 +3898,8 @@ void CodeGenModule::EmitGlobalVarDefinit // global variables become internal definitions. These have to // be internal in order to pre
r365799 - [HIP] Add GPU arch gfx1010, gfx1011, and gfx1012
Author: yaxunl Date: Thu Jul 11 10:50:09 2019 New Revision: 365799 URL: http://llvm.org/viewvc/llvm-project?rev=365799&view=rev Log: [HIP] Add GPU arch gfx1010, gfx1011, and gfx1012 Differential Revision: https://reviews.llvm.org/D64364 Modified: cfe/trunk/include/clang/Basic/Cuda.h cfe/trunk/lib/Basic/Cuda.cpp cfe/trunk/lib/Basic/Targets/NVPTX.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Modified: cfe/trunk/include/clang/Basic/Cuda.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Cuda.h?rev=365799&r1=365798&r2=365799&view=diff == --- cfe/trunk/include/clang/Basic/Cuda.h (original) +++ cfe/trunk/include/clang/Basic/Cuda.h Thu Jul 11 10:50:09 2019 @@ -66,6 +66,9 @@ enum class CudaArch { GFX906, GFX908, GFX909, + GFX1010, + GFX1011, + GFX1012, LAST, }; const char *CudaArchToString(CudaArch A); Modified: cfe/trunk/lib/Basic/Cuda.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Cuda.cpp?rev=365799&r1=365798&r2=365799&view=diff == --- cfe/trunk/lib/Basic/Cuda.cpp (original) +++ cfe/trunk/lib/Basic/Cuda.cpp Thu Jul 11 10:50:09 2019 @@ -113,6 +113,12 @@ const char *CudaArchToString(CudaArch A) return "gfx908"; case CudaArch::GFX909: // TBA return "gfx909"; + case CudaArch::GFX1010: // TBA +return "gfx1010"; + case CudaArch::GFX1011: // TBA +return "gfx1011"; + case CudaArch::GFX1012: // TBA +return "gfx1012"; } llvm_unreachable("invalid enum"); } @@ -151,6 +157,9 @@ CudaArch StringToCudaArch(llvm::StringRe .Case("gfx906", CudaArch::GFX906) .Case("gfx908", CudaArch::GFX908) .Case("gfx909", CudaArch::GFX909) + .Case("gfx1010", CudaArch::GFX1010) + .Case("gfx1011", CudaArch::GFX1011) + .Case("gfx1012", CudaArch::GFX1012) .Default(CudaArch::UNKNOWN); } @@ -264,6 +273,9 @@ CudaVirtualArch VirtualArchForCudaArch(C case CudaArch::GFX906: case CudaArch::GFX908: case CudaArch::GFX909: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: return CudaVirtualArch::COMPUTE_AMDGCN; } llvm_unreachable("invalid enum"); @@ -312,6 +324,9 @@ CudaVersion MinVersionForCudaArch(CudaAr case CudaArch::GFX906: case CudaArch::GFX908: case CudaArch::GFX909: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: return CudaVersion::CUDA_70; } llvm_unreachable("invalid enum"); @@ -336,6 +351,9 @@ CudaVersion MaxVersionForCudaArch(CudaAr case CudaArch::GFX810: case CudaArch::GFX900: case CudaArch::GFX902: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: return CudaVersion::CUDA_80; default: return CudaVersion::LATEST; Modified: cfe/trunk/lib/Basic/Targets/NVPTX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/NVPTX.cpp?rev=365799&r1=365798&r2=365799&view=diff == --- cfe/trunk/lib/Basic/Targets/NVPTX.cpp (original) +++ cfe/trunk/lib/Basic/Targets/NVPTX.cpp Thu Jul 11 10:50:09 2019 @@ -193,6 +193,9 @@ void NVPTXTargetInfo::getTargetDefines(c case CudaArch::GFX906: case CudaArch::GFX908: case CudaArch::GFX909: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: case CudaArch::LAST: break; case CudaArch::UNKNOWN: Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=365799&r1=365798&r2=365799&view=diff == --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Thu Jul 11 10:50:09 2019 @@ -4930,6 +4930,9 @@ void CGOpenMPRuntimeNVPTX::checkArchForU case CudaArch::GFX906: case CudaArch::GFX908: case CudaArch::GFX909: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: case CudaArch::UNKNOWN: break; case CudaArch::LAST: @@ -4985,6 +4988,9 @@ static std::pair get case CudaArch::GFX906: case CudaArch::GFX908: case CudaArch::GFX909: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: case CudaArch::UNKNOWN: break; case CudaArch::LAST: ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r361757 - [OpenCL] Fix file-scope const sampler variable for 2.0
Author: yaxunl Date: Mon May 27 04:19:07 2019 New Revision: 361757 URL: http://llvm.org/viewvc/llvm-project?rev=361757&view=rev Log: [OpenCL] Fix file-scope const sampler variable for 2.0 OpenCL spec v2.0 s6.13.14: Samplers can also be declared as global constants in the program source using the following syntax. const sampler_t = This works fine for OpenCL 1.2 but fails for 2.0, because clang duduces address space of file-scope const sampler variable to be in global address space whereas spec v2.0 s6.9.b forbids file-scope sampler variable to be in global address space. The fix is not to deduce address space for file-scope sampler variables. Differential Revision: https://reviews.llvm.org/D62197 Modified: cfe/trunk/lib/Sema/SemaType.cpp cfe/trunk/test/CodeGenOpenCL/sampler.cl cfe/trunk/test/SemaOpenCL/sampler_t.cl Modified: cfe/trunk/lib/Sema/SemaType.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaType.cpp?rev=361757&r1=361756&r2=361757&view=diff == --- cfe/trunk/lib/Sema/SemaType.cpp (original) +++ cfe/trunk/lib/Sema/SemaType.cpp Mon May 27 04:19:07 2019 @@ -7363,7 +7363,21 @@ static void deduceOpenCLImplicitAddrSpac T->isDependentType() || // Do not deduce addr space of decltype because it will be taken from // its argument. - T->isDecltypeType()) + T->isDecltypeType() || + // OpenCL spec v2.0 s6.9.b: + // The sampler type cannot be used with the __local and __global address + // space qualifiers. + // OpenCL spec v2.0 s6.13.14: + // Samplers can also be declared as global constants in the program + // source using the following syntax. + // const sampler_t = + // In codegen, file-scope sampler type variable has special handing and + // does not rely on address space qualifier. On the other hand, deducing + // address space of const sampler file-scope variable as global address + // space causes spurious diagnostic about __global address space + // qualifier, therefore do not deduce address space of file-scope sampler + // type variable. + (D.getContext() == DeclaratorContext::FileContext && T->isSamplerT())) return; LangAS ImpAddr = LangAS::Default; Modified: cfe/trunk/test/CodeGenOpenCL/sampler.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/sampler.cl?rev=361757&r1=361756&r2=361757&view=diff == --- cfe/trunk/test/CodeGenOpenCL/sampler.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/sampler.cl Mon May 27 04:19:07 2019 @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s // // This test covers 5 cases of sampler initialzation: // 1. function argument passing @@ -6,8 +7,9 @@ // 1b. argument is a function-scope variable // 1c. argument is one of caller function's parameters // 2. variable initialization -// 2a. initializing a file-scope variable +// 2a. initializing a file-scope variable with constant addr space qualifier // 2b. initializing a function-scope variable +// 2c. initializing a file-scope variable with const qualifier #define CLK_ADDRESS_CLAMP_TO_EDGE 2 #define CLK_NORMALIZED_COORDS_TRUE 1 @@ -20,6 +22,10 @@ constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; // CHECK-NOT: glb_smp +// Case 2c +const sampler_t glb_smp_const = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; +// CHECK-NOT: glb_smp_const + int get_sampler_initializer(void); void fnc4smp(sampler_t s) {} @@ -47,11 +53,16 @@ kernel void foo(sampler_t smp_par) { // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]] // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) - // Case 1a + // Case 1a/2a fnc4smp(glb_smp); // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // Case 1a/2c + fnc4smp(glb_smp_const); + // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // Case 1c fnc4smp(smp_par); // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_par_ptr]] Modified: cfe/trunk/test/SemaOpenCL/sampler_t.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/sampler_t.cl?rev=361757&r1=361756&r2=361757&view=diff =
r361880 - [CUDA][HIP] Emit dependent libs for host only
Author: yaxunl Date: Tue May 28 14:18:59 2019 New Revision: 361880 URL: http://llvm.org/viewvc/llvm-project?rev=361880&view=rev Log: [CUDA][HIP] Emit dependent libs for host only Recently D60274 was introduced to allow lld to handle dependent libs. However current usage of dependent libs (e.g. pragma comment(lib, *) in windows header files) are intended for host only. Emitting the metadata in device IR causes link error in device path. Until there is a way to different it dependent libs for device or host, metadata for dependent libs should be emitted for host only. This patch enforces that. Differential Revision: https://reviews.llvm.org/D62483 Added: cfe/trunk/test/CodeGenCUDA/dependent-libs.cu Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=361880&r1=361879&r2=361880&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue May 28 14:18:59 2019 @@ -457,7 +457,12 @@ void CodeGenModule::Release() { // that ELF linkers tend to handle libraries in a more complicated fashion // than on other platforms. This forces us to defer handling the dependent // libs to the linker. - if (!ELFDependentLibraries.empty()) { + // + // CUDA/HIP device and host libraries are different. Currently there is no + // way to differentiate dependent libraries for host or device. Existing + // usage of #pragma comment(lib, *) is intended for host libraries on + // Windows. Therefore emit llvm.dependent-libraries only for host. + if (!ELFDependentLibraries.empty() && !Context.getLangOpts().CUDAIsDevice) { auto *NMD = getModule().getOrInsertNamedMetadata("llvm.dependent-libraries"); for (auto *MD : ELFDependentLibraries) NMD->addOperand(MD); Added: cfe/trunk/test/CodeGenCUDA/dependent-libs.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/dependent-libs.cu?rev=361880&view=auto == --- cfe/trunk/test/CodeGenCUDA/dependent-libs.cu (added) +++ cfe/trunk/test/CodeGenCUDA/dependent-libs.cu Tue May 28 14:18:59 2019 @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck --check-prefix=DEV %s +// RUN: %clang_cc1 -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s + +// DEV-NOT: llvm.dependent-libraries +// HOST: llvm.dependent-libraries +#pragma comment(lib, "libabc") ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r361905 - Fix failure of lit test dependent-libs.cu
Author: yaxunl Date: Tue May 28 18:34:44 2019 New Revision: 361905 URL: http://llvm.org/viewvc/llvm-project?rev=361905&view=rev Log: Fix failure of lit test dependent-libs.cu Modified: cfe/trunk/test/CodeGenCUDA/dependent-libs.cu Modified: cfe/trunk/test/CodeGenCUDA/dependent-libs.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/dependent-libs.cu?rev=361905&r1=361904&r2=361905&view=diff == --- cfe/trunk/test/CodeGenCUDA/dependent-libs.cu (original) +++ cfe/trunk/test/CodeGenCUDA/dependent-libs.cu Tue May 28 18:34:44 2019 @@ -1,5 +1,7 @@ -// RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck --check-prefix=DEV %s -// RUN: %clang_cc1 -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck --check-prefix=DEV %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s // DEV-NOT: llvm.dependent-libraries // HOST: llvm.dependent-libraries ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r350885 - [HIP] Use nul instead of /dev/null when running on windows
Author: yaxunl Date: Thu Jan 10 12:09:52 2019 New Revision: 350885 URL: http://llvm.org/viewvc/llvm-project?rev=350885&view=rev Log: [HIP] Use nul instead of /dev/null when running on windows When clang is running on windows, /dev/null is not available. Use nul as empty input file instead. Differential Revision: https://reviews.llvm.org/D56225 Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=350885&r1=350884&r2=350885&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Jan 10 12:09:52 2019 @@ -24,6 +24,12 @@ using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; +#if _WIN32 || _WIN64 +#define NULL_FILE "nul" +#else +#define NULL_FILE "/dev/null" +#endif + namespace { static void addBCLib(Compilation &C, const ArgList &Args, @@ -197,7 +203,7 @@ void AMDGCN::constructHIPFatbinCommand(C // ToDo: Remove the dummy host binary entry which is required by // clang-offload-bundler. std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux"; - std::string BundlerInputArg = "-inputs=/dev/null"; + std::string BundlerInputArg = "-inputs=" NULL_FILE; for (const auto &II : Inputs) { const auto* A = II.getAction(); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r372394 - [CUDA][HIP] Fix hostness of defaulted constructor
Author: yaxunl Date: Fri Sep 20 07:28:09 2019 New Revision: 372394 URL: http://llvm.org/viewvc/llvm-project?rev=372394&view=rev Log: [CUDA][HIP] Fix hostness of defaulted constructor Clang does not respect the explicit device host attributes of defaulted special members. Also clang does not respect the hostness of special members determined by their first declarations. Clang also adds duplicate implicit device or host attributes in certain cases. This patch fixes that. Differential Revision: https://reviews.llvm.org/D67509 Added: cfe/trunk/test/SemaCUDA/default-ctor.cu Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=372394&r1=372393&r2=372394&view=diff == --- cfe/trunk/lib/Sema/SemaCUDA.cpp (original) +++ cfe/trunk/lib/Sema/SemaCUDA.cpp Fri Sep 20 07:28:09 2019 @@ -267,6 +267,18 @@ bool Sema::inferCUDATargetForImplicitSpe CXXMethodDecl *MemberDecl, bool ConstRHS, bool Diagnose) { + // If the defaulted special member is defined lexically outside of its + // owning class, or the special member already has explicit device or host + // attributes, do not infer. + bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); + bool HasH = MemberDecl->hasAttr(); + bool HasD = MemberDecl->hasAttr(); + bool HasExplicitAttr = + (HasD && !MemberDecl->getAttr()->isImplicit()) || + (HasH && !MemberDecl->getAttr()->isImplicit()); + if (!InClass || HasExplicitAttr) +return false; + llvm::Optional InferredTarget; // We're going to invoke special member lookup; mark that these special @@ -371,21 +383,24 @@ bool Sema::inferCUDATargetForImplicitSpe } } + + // If no target was inferred, mark this member as __host__ __device__; + // it's the least restrictive option that can be invoked from any target. + bool NeedsH = true, NeedsD = true; if (InferredTarget.hasValue()) { -if (InferredTarget.getValue() == CFT_Device) { - MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); -} else if (InferredTarget.getValue() == CFT_Host) { - MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); -} else { - MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); - MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); -} - } else { -// If no target was inferred, mark this member as __host__ __device__; -// it's the least restrictive option that can be invoked from any target. +if (InferredTarget.getValue() == CFT_Device) + NeedsH = false; +else if (InferredTarget.getValue() == CFT_Host) + NeedsD = false; + } + + // We either setting attributes first time, or the inferred ones must match + // previously set ones. + assert(!(HasD || HasH) || (NeedsD == HasD && NeedsH == HasH)); + if (NeedsD && !HasD) MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + if (NeedsH && !HasH) MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); - } return false; } Added: cfe/trunk/test/SemaCUDA/default-ctor.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/default-ctor.cu?rev=372394&view=auto == --- cfe/trunk/test/SemaCUDA/default-ctor.cu (added) +++ cfe/trunk/test/SemaCUDA/default-ctor.cu Fri Sep 20 07:28:09 2019 @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN:-fcuda-is-device -verify -verify-ignore-unexpected=note %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN:-verify -verify-ignore-unexpected=note %s + +#include "Inputs/cuda.h" + +struct In { In() = default; }; +struct InD { __device__ InD() = default; }; +struct InH { __host__ InH() = default; }; +struct InHD { __host__ __device__ InHD() = default; }; + +struct Out { Out(); }; +struct OutD { __device__ OutD(); }; +struct OutH { __host__ OutH(); }; +struct OutHD { __host__ __device__ OutHD(); }; + +Out::Out() = default; +__device__ OutD::OutD() = default; +__host__ OutH::OutH() = default; +__host__ __device__ OutHD::OutHD() = default; + +__device__ void fd() { + In in; + InD ind; + InH inh; // expected-error{{no matching constructor for initialization of 'InH'}} + InHD inhd; + Out out; // expected-error{{no matching constructor for initialization of 'Out'}} + OutD outd; + OutH outh; // expected-error{{no matching constructor for initialization of 'OutH'}} + OutHD outhd; +} + +__host__ void fh() { + In in; + InD ind; // expected-error{{no matching constructor for initialization of 'InD'}} + InH inh; + InHD inhd; + Out out; + OutD outd; // expected
r372452 - Revert assertion added by r372394
Author: yaxunl Date: Fri Sep 20 19:51:44 2019 New Revision: 372452 URL: http://llvm.org/viewvc/llvm-project?rev=372452&view=rev Log: Revert assertion added by r372394 The assertion added by r372394 causes CUDA test in test-suite to assert. The assertion was not there originally, so revert it. Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=372452&r1=372451&r2=372452&view=diff == --- cfe/trunk/lib/Sema/SemaCUDA.cpp (original) +++ cfe/trunk/lib/Sema/SemaCUDA.cpp Fri Sep 20 19:51:44 2019 @@ -396,7 +396,6 @@ bool Sema::inferCUDATargetForImplicitSpe // We either setting attributes first time, or the inferred ones must match // previously set ones. - assert(!(HasD || HasH) || (NeedsD == HasD && NeedsH == HasH)); if (NeedsD && !HasD) MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); if (NeedsH && !HasH) ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r372773 - [HIP] Support new kernel launching API
Author: yaxunl Date: Tue Sep 24 12:16:40 2019 New Revision: 372773 URL: http://llvm.org/viewvc/llvm-project?rev=372773&view=rev Log: [HIP] Support new kernel launching API Differential Revision: https://reviews.llvm.org/D67947 Modified: cfe/trunk/include/clang/Basic/LangOptions.def cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/CodeGen/CGCUDANV.cpp cfe/trunk/lib/Driver/ToolChains/Clang.cpp cfe/trunk/lib/Frontend/CompilerInvocation.cpp cfe/trunk/lib/Sema/SemaCUDA.cpp cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h cfe/trunk/test/CodeGenCUDA/kernel-call.cu Modified: cfe/trunk/include/clang/Basic/LangOptions.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=372773&r1=372772&r2=372773&view=diff == --- cfe/trunk/include/clang/Basic/LangOptions.def (original) +++ cfe/trunk/include/clang/Basic/LangOptions.def Tue Sep 24 12:16:40 2019 @@ -226,6 +226,8 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0, LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") +LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") + LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable") Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=372773&r1=372772&r2=372773&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Tue Sep 24 12:16:40 2019 @@ -599,6 +599,9 @@ def hip_device_lib_EQ : Joined<["--"], " HelpText<"HIP device library">; def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">, Group, Flags<[NoArgumentUnused, HelpHidden]>; +def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">, + Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">; +def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=372773&r1=372772&r2=372773&view=diff == --- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Tue Sep 24 12:16:40 2019 @@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(Cod EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), - CudaFeature::CUDA_USES_NEW_LAUNCH)) + CudaFeature::CUDA_USES_NEW_LAUNCH) || + CGF.getLangOpts().HIPUseNewLaunchAPI) emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); @@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBody llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); - // Lookup cudaLaunchKernel function. + // Lookup cudaLaunchKernel/hipLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); + // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + //void **args, size_t sharedMem, + //hipStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + auto LaunchKernelName = addPrefixToName("LaunchKernel"); IdentifierInfo &cudaLaunchKernelII = - CGM.getContext().Idents.get("cudaLaunchKernel"); + CGM.getContext().Idents.get(LaunchKernelName); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast(Result)) @@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBody if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), - "Can't find declaration for cudaLaunchKernel()"); + "Can't find declaration for " + LaunchKernelName); return; } // Create temporary dim3 grid_dim, block_dim. @@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBody /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), - "__cudaPopCallConfigura
r373561 - [HIP] Support -emit-llvm for device compilation
Author: yaxunl Date: Wed Oct 2 20:27:43 2019 New Revision: 373561 URL: http://llvm.org/viewvc/llvm-project?rev=373561&view=rev Log: [HIP] Support -emit-llvm for device compilation Sometimes it is useful to compile HIP device code to LLVM BC. It is not convenient to use clang -cc1 since there are lots of options needed. This patch allows clang driver to compile HIP device code to LLVM BC with -emit-llvm -c. Differential Revision: https://reviews.llvm.org/D68284 Added: cfe/trunk/test/Driver/hip-device-compile.hip Modified: cfe/trunk/lib/Driver/Driver.cpp Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=373561&r1=373560&r2=373561&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Wed Oct 2 20:27:43 2019 @@ -2312,6 +2312,8 @@ class OffloadingActionBuilder final { /// compilation. bool CompileHostOnly = false; bool CompileDeviceOnly = false; +bool EmitLLVM = false; +bool EmitAsm = false; /// List of GPU architectures to use in this compilation. SmallVector GpuArchList; @@ -2478,6 +2480,8 @@ class OffloadingActionBuilder final { CompileDeviceOnly = PartialCompilationArg && PartialCompilationArg->getOption().matches( options::OPT_cuda_device_only); + EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); + EmitAsm = Args.getLastArg(options::OPT_S); // Collect all cuda_gpu_arch parameters, removing duplicates. std::set GpuArchs; @@ -2664,7 +2668,8 @@ class OffloadingActionBuilder final { assert(!CompileHostOnly && "Not expecting CUDA actions in host-only compilation."); - if (!Relocatable && CurPhase == phases::Backend) { + if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM && + !EmitAsm) { // If we are in backend phase, we attempt to generate the fat binary. // We compile each arch to IR and use a link action to generate code // object containing ISA. Then we use a special "link" action to create @@ -2732,7 +2737,8 @@ class OffloadingActionBuilder final { A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A, AssociatedOffloadKind); - return ABRT_Success; + return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host + : ABRT_Success; } void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { Added: cfe/trunk/test/Driver/hip-device-compile.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-device-compile.hip?rev=373561&view=auto == --- cfe/trunk/test/Driver/hip-device-compile.hip (added) +++ cfe/trunk/test/Driver/hip-device-compile.hip Wed Oct 2 20:27:43 2019 @@ -0,0 +1,72 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// If -emit-llvm and/or -S is used in device only compilation, +// the output should not be bundled. + +// RUN: %clang -c -emit-llvm --cuda-device-only -### -target x86_64-linux-gnu \ +// RUN: -o a.bc -x hip --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BC %s + +// RUN: %clang -c -S -emit-llvm --cuda-device-only -### -target x86_64-linux-gnu \ +// RUN: -o a.ll -x hip --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LL %s + +// RUN: %clang -c -S --cuda-device-only -### -target x86_64-linux-gnu \ +// RUN: -o a.s -x hip --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM %s + +// CHECK: {{".*clang.*"}} "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// BC-SAME: "-emit-llvm-bc" +// LL-SAME: "-emit-llvm" +// ASM-NOT: "-emit-llvm" +// CHECK-SAME: "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" +// CHECK-SAME: "-fcuda-is-device" +// CHECK-SAME: {{".*lib1.bc"}} +// BC-SAME: "-o" "a.bc" +// LL-SAME: "-o" "a.ll" +// ASM-SAME: "-o" "a.s" +// CHECK-SAME: {{".*a.cu"}} + +// CHECK-NOT: {{"*.llvm-link"}} +// CHECK-NOT: {{".*opt"}} +// CHECK-NOT: {{".*llc"}} +// CHECK-NOT: {{".*lld"}} +// CHECK-NOT: {{".*clang-offload-bundler"}} +// CHECK-NOT: {{".*ld.*"}} + +// If neither -emit-llvm nor
r373649 - [HIP] Use option -nogpulib to disable linking device lib
Author: yaxunl Date: Thu Oct 3 11:59:56 2019 New Revision: 373649 URL: http://llvm.org/viewvc/llvm-project?rev=373649&view=rev Log: [HIP] Use option -nogpulib to disable linking device lib Differential Revision: https://reviews.llvm.org/D68300 Added: cfe/trunk/test/Driver/hip-no-device-libs.hip Modified: cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/Driver/ToolChains/Cuda.cpp cfe/trunk/lib/Driver/ToolChains/HIP.cpp Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=373649&r1=373648&r2=373649&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Thu Oct 3 11:59:56 2019 @@ -2556,7 +2556,9 @@ def no__dead__strip__inits__and__terms : def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option, CoreOption]>, HelpText<"Disable builtin #include directories">; def nocudainc : Flag<["-"], "nocudainc">; -def nocudalib : Flag<["-"], "nocudalib">; +def nogpulib : Flag<["-"], "nogpulib">, + HelpText<"Do not link device library for CUDA/HIP device compilation">; +def : Flag<["-"], "nocudalib">, Alias; def nodefaultlibs : Flag<["-"], "nodefaultlibs">; def nofixprebinding : Flag<["-"], "nofixprebinding">; def nolibc : Flag<["-"], "nolibc">; Modified: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Cuda.cpp?rev=373649&r1=373648&r2=373649&view=diff == --- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp Thu Oct 3 11:59:56 2019 @@ -121,7 +121,7 @@ CudaInstallationDetector::CudaInstallati Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda"); } - bool NoCudaLib = Args.hasArg(options::OPT_nocudalib); + bool NoCudaLib = Args.hasArg(options::OPT_nogpulib); for (const auto &Candidate : Candidates) { InstallPath = Candidate.Path; @@ -628,7 +628,7 @@ void CudaToolChain::addClangTargetOption CC1Args.push_back("-fgpu-rdc"); } - if (DriverArgs.hasArg(options::OPT_nocudalib)) + if (DriverArgs.hasArg(options::OPT_nogpulib)) return; std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=373649&r1=373648&r2=373649&view=diff == --- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Oct 3 11:59:56 2019 @@ -286,6 +286,9 @@ void HIPToolChain::addClangTargetOptions CC1Args.append({"-fvisibility", "hidden"}); CC1Args.push_back("-fapply-global-visibility-to-externs"); } + + if (DriverArgs.hasArg(options::OPT_nogpulib)) +return; ArgStringList LibraryPaths; // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. Added: cfe/trunk/test/Driver/hip-no-device-libs.hip URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-no-device-libs.hip?rev=373649&view=auto == --- cfe/trunk/test/Driver/hip-no-device-libs.hip (added) +++ cfe/trunk/test/Driver/hip-no-device-libs.hip Thu Oct 3 11:59:56 2019 @@ -0,0 +1,11 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -nogpulib -target x86_64-linux-gnu \ +// RUN: -x hip --cuda-gpu-arch=gfx900 \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s + +// CHECK-NOT: "-mlink-builtin-bitcode" + ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r330279 - [HIP] Add driver input type for HIP
Author: yaxunl Date: Wed Apr 18 11:25:03 2018 New Revision: 330279 URL: http://llvm.org/viewvc/llvm-project?rev=330279&view=rev Log: [HIP] Add driver input type for HIP Patch by Greg Rodgers. Revised by Yaxun Liu. Differential Revision: https://reviews.llvm.org/D45489 Modified: cfe/trunk/include/clang/Driver/Types.def cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Types.cpp Modified: cfe/trunk/include/clang/Driver/Types.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=330279&r1=330278&r2=330279&view=diff == --- cfe/trunk/include/clang/Driver/Types.def (original) +++ cfe/trunk/include/clang/Driver/Types.def Wed Apr 18 11:25:03 2018 @@ -46,6 +46,9 @@ TYPE("cl", CL, TYPE("cuda-cpp-output", PP_CUDA, INVALID, "cui", "u") TYPE("cuda", CUDA, PP_CUDA, "cu","u") TYPE("cuda", CUDA_DEVICE, PP_CUDA, "cu","") +TYPE("hip-cpp-output", PP_HIP, INVALID, "cui", "u") +TYPE("hip", HIP, PP_HIP, "cu","u") +TYPE("hip", HIP_DEVICE, PP_HIP, "cu","") TYPE("objective-c-cpp-output", PP_ObjC, INVALID, "mi","u") TYPE("objc-cpp-output", PP_ObjC_Alias, INVALID,"mi","u") TYPE("objective-c", ObjC, PP_ObjC, "m", "u") Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=330279&r1=330278&r2=330279&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Wed Apr 18 11:25:03 2018 @@ -2257,9 +2257,10 @@ class OffloadingActionBuilder final { assert(!GpuArchList.empty() && "We should have at least one GPU architecture."); -// If the host input is not CUDA, we don't need to bother about this -// input. -if (IA->getType() != types::TY_CUDA) { +// If the host input is not CUDA or HIP, we don't need to bother about +// this input. +if (IA->getType() != types::TY_CUDA && +IA->getType() != types::TY_HIP) { // The builder will ignore this input. IsActive = false; return ABRT_Inactive; @@ -2272,9 +2273,12 @@ class OffloadingActionBuilder final { return ABRT_Success; // Replicate inputs for each GPU architecture. -for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - CudaDeviceActions.push_back(C.MakeAction( - IA->getInputArg(), types::TY_CUDA_DEVICE)); +auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE + : types::TY_CUDA_DEVICE; +for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { + CudaDeviceActions.push_back( + C.MakeAction(IA->getInputArg(), Ty)); +} return ABRT_Success; } Modified: cfe/trunk/lib/Driver/Types.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Types.cpp?rev=330279&r1=330278&r2=330279&view=diff == --- cfe/trunk/lib/Driver/Types.cpp (original) +++ cfe/trunk/lib/Driver/Types.cpp Wed Apr 18 11:25:03 2018 @@ -102,6 +102,9 @@ bool types::isAcceptedByClang(ID Id) { case TY_CL: case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: + case TY_HIP: + case TY_PP_HIP: + case TY_HIP_DEVICE: case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias: case TY_CXX: case TY_PP_CXX: case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias: @@ -141,6 +144,9 @@ bool types::isCXX(ID Id) { case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: case TY_CXXModule: case TY_PP_CXXModule: case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: + case TY_HIP: + case TY_PP_HIP: + case TY_HIP_DEVICE: return true; } } @@ -166,6 +172,9 @@ bool types::isCuda(ID Id) { case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: + case TY_HIP: + case TY_PP_HIP: + case TY_HIP_DEVICE: return true; } } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r330447 - [CUDA] Set LLVM calling convention for CUDA kernel
Author: yaxunl Date: Fri Apr 20 10:01:03 2018 New Revision: 330447 URL: http://llvm.org/viewvc/llvm-project?rev=330447&view=rev Log: [CUDA] Set LLVM calling convention for CUDA kernel Some targets need special LLVM calling convention for CUDA kernel. This patch does that through a TargetCodeGenInfo hook. It only affects amdgcn target. Patch by Greg Rodgers. Revised and lit tests added by Yaxun Liu. Differential Revision: https://reviews.llvm.org/D45223 Added: cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/lib/CodeGen/TargetInfo.h Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=330447&r1=330446&r2=330447&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri Apr 20 10:01:03 2018 @@ -3627,6 +3627,9 @@ void CodeGenModule::EmitGlobalFunctionDe MaybeHandleStaticInExternC(D, Fn); + if (D->hasAttr()) +getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn); + maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(D, Fn, FI); Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=330447&r1=330446&r2=330447&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Fri Apr 20 10:01:03 2018 @@ -7637,6 +7637,7 @@ public: llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const override; bool shouldEmitStaticExternCAliases() const override; + void setCUDAKernelCallingConvention(llvm::Function *F) const override; }; } @@ -7772,6 +7773,11 @@ bool AMDGPUTargetCodeGenInfo::shouldEmit return false; } +void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( +llvm::Function *F) const { + F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); +} + //===--===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. Modified: cfe/trunk/lib/CodeGen/TargetInfo.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.h?rev=330447&r1=330446&r2=330447&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.h (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.h Fri Apr 20 10:01:03 2018 @@ -301,6 +301,8 @@ public: /// mangled name of functions declared within an extern "C" region and marked /// as 'used', and having internal linkage. virtual bool shouldEmitStaticExternCAliases() const { return true; } + + virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {} }; } // namespace CodeGen Added: cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu?rev=330447&view=auto == --- cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu (added) +++ cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu Fri Apr 20 10:01:03 2018 @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv +class A { +public: + static __global__ void kernel(){} +}; + +// CHECK: define void @_Z10non_kernelv +__device__ void non_kernel(){} + +// CHECK: define amdgpu_kernel void @_Z6kerneli +__global__ void kernel(int x) { + non_kernel(); +} + +// CHECK: define amdgpu_kernel void @_Z11EmptyKernelIvEvv +template +__global__ void EmptyKernel(void) {} + +struct Dummy { + /// Type definition of the EmptyKernel kernel entry point + typedef void (*EmptyKernelPtr)(); + EmptyKernelPtr Empty() { return EmptyKernel; } +}; + +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_ +template +__global__ void template_kernel(T x) {} + +void launch(void *f); + +int main() { + Dummy D; + launch((void*)A::kernel); + launch((void*)kernel); + launch((void*)template_kernel); + launch((void*)D.Empty()); + return 0; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits