yaxunl created this revision. yaxunl added a reviewer: tra. Herald added subscribers: kerbowa, tpr, nhaehnle, jvesely. Herald added a reviewer: aaron.ballman. yaxunl requested review of this revision.
AMDGPU does not have a common processor (GPU arch). A HIP kernel must be compiled with a specified processor to be able to be launched on that processor. However we cannot simply diagnose missing --offload-arch in clang driver since a valid HIP program can contain no kernel, which can be compiled without specifying offload arch and executed on machines without AMDGPU. Therefore only HIP programs containing kernels should be diagnosed when compiled without offload arch. This patch changes clang driver so that when offload arch is not specified for HIP, no target CPU is specified for clang -cc1. If HIP program contains kernel, FE will diagnose it as a fatal error so that the diagnostics will be emitted only once. This way, we allow HIP programs without kernels to be compiled without offload arch whereas forbid HIP programs with kernels to be compiled without offload arch. https://reviews.llvm.org/D100552 Files: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Driver/ToolChains/HIP.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu clang/test/CodeGenCUDA/kernel-amdgcn.cu clang/test/CodeGenCUDA/kernel-args.cu clang/test/CodeGenCUDA/kernel-dbg-info.cu clang/test/CodeGenCUDA/lambda-reference-var.cu clang/test/CodeGenCUDA/lambda.cu clang/test/CodeGenCUDA/managed-var.cu clang/test/CodeGenCUDA/norecurse.cu clang/test/CodeGenCUDA/static-device-var-no-rdc.cu clang/test/CodeGenCUDA/static-device-var-rdc.cu clang/test/CodeGenCUDA/unnamed-types.cu clang/test/Driver/cuda-flush-denormals-to-zero.cu clang/test/Driver/hip-default-gpu-arch.hip clang/test/SemaCUDA/kernel-no-gpu.cu
Index: clang/test/SemaCUDA/kernel-no-gpu.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/kernel-no-gpu.cu @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -fcuda-is-device -verify=hip -x hip %s +// RUN: %clang_cc1 -fcuda-is-device -verify=cuda %s +// cuda-no-diagnostics + +#include "Inputs/cuda.h" + +__global__ void kern1() {} +// hip-error@-1 {{compile HIP kernel without specifying offload arch is not allowed}} + +// Make sure the error is emitted once. +__global__ void kern2() {} Index: clang/test/Driver/hip-default-gpu-arch.hip =================================================================== --- clang/test/Driver/hip-default-gpu-arch.hip +++ clang/test/Driver/hip-default-gpu-arch.hip @@ -4,4 +4,5 @@ // RUN: %clang -### -c %s 2>&1 | FileCheck %s -// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx803" +// CHECK-NOT: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" +// CHECK: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa" Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu =================================================================== --- clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -26,8 +26,11 @@ // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s -// Test no subtarget, which should get the denormal setting of the default gfx803 -// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// Test no subtarget, which should get the denormal setting of the default +// CPU of AMDGPU target, which is 'none'. +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c \ +// RUN: -march=haswell -nocudainc -nogpulib %s 2>&1 \ +// RUN: | FileCheck -check-prefix=NOFTZ %s // Test multiple offload archs with different defaults. // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- clang/test/CodeGenCUDA/unnamed-types.cu +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC -// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE +// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu =================================================================== --- clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -2,7 +2,7 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=DEV,INT-DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux \ @@ -10,7 +10,7 @@ // RUN: -check-prefixes=HOST,INT-HOST %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev +// RUN: -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev // RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu =================================================================== --- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -2,7 +2,7 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=DEV %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ Index: clang/test/CodeGenCUDA/norecurse.cu =================================================================== --- clang/test/CodeGenCUDA/norecurse.cu +++ clang/test/CodeGenCUDA/norecurse.cu @@ -5,7 +5,8 @@ // RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s +// RUN: -target-cpu gfx906 -emit-llvm -disable-llvm-passes \ +// RUN: -o - -x hip %s | FileCheck %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/managed-var.cu =================================================================== --- clang/test/CodeGenCUDA/managed-var.cu +++ clang/test/CodeGenCUDA/managed-var.cu @@ -1,11 +1,11 @@ // REQUIRES: x86-registered-target, amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,DEV,NORDC-D %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev +// RUN: -target-cpu gfx906 -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev // RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,RDC-D %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ Index: clang/test/CodeGenCUDA/lambda.cu =================================================================== --- clang/test/CodeGenCUDA/lambda.cu +++ clang/test/CodeGenCUDA/lambda.cu @@ -3,7 +3,7 @@ // RUN: | FileCheck -check-prefix=HOST %s // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 | FileCheck -check-prefix=DEV %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/lambda-reference-var.cu =================================================================== --- clang/test/CodeGenCUDA/lambda-reference-var.cu +++ clang/test/CodeGenCUDA/lambda-reference-var.cu @@ -3,7 +3,7 @@ // RUN: | FileCheck -check-prefix=HOST %s // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 | FileCheck -check-prefix=DEV %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/kernel-dbg-info.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-dbg-info.cu +++ clang/test/CodeGenCUDA/kernel-dbg-info.cu @@ -5,7 +5,8 @@ // RUN: -o - -x hip | FileCheck -check-prefixes=CHECK,O0 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ -// RUN: -o - -x hip -fcuda-is-device | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 -o - -x hip -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ @@ -14,7 +15,8 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \ -// RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O3 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ @@ -22,7 +24,8 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \ // RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \ // RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \ -// RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s +// RUN: -target-cpu gfx906 -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/kernel-args.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-args.cu +++ clang/test/CodeGenCUDA/kernel-args.cu @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \ -// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s +// RUN: -target-cpu gfx906 -emit-llvm %s -o - \ +// RUN: | FileCheck -check-prefix=AMDGCN %s // RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-amdgcn.cu +++ clang/test/CodeGenCUDA/kernel-amdgcn.cu @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -fcuda-is-device \ +// RUN: -emit-llvm -x hip %s -o - | FileCheck %s #include "Inputs/cuda.h" // CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=CHECK,DEFAULT %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -target-cpu gfx906 -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=CHECK,MAX1024 %s // RUN: %clang_cc1 -triple nvptx \ // RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \ Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -1,9 +1,15 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \ +// RUN: -fcuda-is-device -emit-llvm -x hip %s -o - \ +// RUN: | FileCheck --check-prefixes=COMMON,CHECK %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \ +// RUN: -fcuda-is-device -emit-llvm -x hip %s \ +// RUN: -disable-O0-optnone -o - | opt -S -O2 \ +// RUN: | FileCheck %s --check-prefixes=COMMON,OPT +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -x hip %s -o - | FileCheck -check-prefix=HOST %s #include "Inputs/cuda.h" Index: clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu +++ clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s #include "Inputs/cuda.h" __global__ void hip_kernel_temp() { Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -4428,6 +4428,11 @@ } S.Diag(Method->getBeginLoc(), diag::warn_kern_is_method) << Method; } + if (S.getASTContext().getTargetInfo().getTargetOpts().CPU.empty() && + S.getLangOpts().HIP && S.getLangOpts().CUDAIsDevice) { + S.Diag(FD->getBeginLoc(), diag::err_hip_kern_without_gpu); + return; + } // Only warn for "inline" when compiling for host, to cut down on noise. if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice) S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD; Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -293,8 +293,11 @@ if (!BoundArch.empty()) { DAL->eraseArg(options::OPT_mcpu_EQ); - DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch); - checkTargetID(*DAL); + if (BoundArch != "unknown") { + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), + BoundArch); + checkTargetID(*DAL); + } } return DAL; @@ -355,7 +358,8 @@ llvm::SmallVector<std::string, 12> HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { llvm::SmallVector<std::string, 12> BCLibs; - if (DriverArgs.hasArg(options::OPT_nogpulib)) + StringRef GpuArch = getGPUArch(DriverArgs); + if (DriverArgs.hasArg(options::OPT_nogpulib) || GpuArch.empty()) return {}; ArgStringList LibraryPaths; @@ -386,7 +390,6 @@ getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; return {}; } - StringRef GpuArch = getGPUArch(DriverArgs); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void)GpuArch; auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8256,6 +8256,9 @@ def warn_kern_is_inline : Warning< "ignored 'inline' attribute on kernel function %0">, InGroup<CudaCompat>; +def err_hip_kern_without_gpu : Error< + "compile HIP kernel without specifying offload arch is not allowed">, + DefaultFatal; def err_variadic_device_fn : Error< "CUDA device code does not support variadic functions">; def err_va_arg_in_device : Error<
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits