yaxunl created this revision.
yaxunl added a reviewer: tra.
Herald added subscribers: kerbowa, tpr, nhaehnle, jvesely.
Herald added a reviewer: aaron.ballman.
yaxunl requested review of this revision.

AMDGPU does not have a common processor (GPU arch). A HIP kernel
must be compiled with a specified processor to be able to be launched
on that processor.

However we cannot simply diagnose missing --offload-arch in clang
driver since a valid HIP program can contain no kernel, which can be
compiled without specifying offload arch and executed on machines
without AMDGPU.

Therefore only HIP programs containing kernels should be diagnosed
when compiled without offload arch.

This patch changes clang driver so that when offload arch is not specified
for HIP, no target CPU is specified for clang -cc1. If HIP program contains
kernel, FE will diagnose it as a fatal error so that the diagnostics will be
emitted only once.  This way, we allow HIP programs without kernels to
be compiled without offload arch whereas forbid HIP programs with
kernels to be compiled without offload arch.


https://reviews.llvm.org/D100552

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
  clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
  clang/test/CodeGenCUDA/kernel-amdgcn.cu
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenCUDA/kernel-dbg-info.cu
  clang/test/CodeGenCUDA/lambda-reference-var.cu
  clang/test/CodeGenCUDA/lambda.cu
  clang/test/CodeGenCUDA/managed-var.cu
  clang/test/CodeGenCUDA/norecurse.cu
  clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
  clang/test/CodeGenCUDA/static-device-var-rdc.cu
  clang/test/CodeGenCUDA/unnamed-types.cu
  clang/test/Driver/cuda-flush-denormals-to-zero.cu
  clang/test/Driver/hip-default-gpu-arch.hip
  clang/test/SemaCUDA/kernel-no-gpu.cu

Index: clang/test/SemaCUDA/kernel-no-gpu.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/kernel-no-gpu.cu
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -fcuda-is-device -verify=hip -x hip %s
+// RUN: %clang_cc1 -fcuda-is-device -verify=cuda %s
+// cuda-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+__global__ void kern1() {}
+// hip-error@-1 {{compile HIP kernel without specifying offload arch is not allowed}}
+
+// Make sure the error is emitted once.
+__global__ void kern2() {}
Index: clang/test/Driver/hip-default-gpu-arch.hip
===================================================================
--- clang/test/Driver/hip-default-gpu-arch.hip
+++ clang/test/Driver/hip-default-gpu-arch.hip
@@ -4,4 +4,5 @@
 
 // RUN: %clang -### -c %s 2>&1 | FileCheck %s
 
-// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx803"
+// CHECK-NOT: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu"
+// CHECK: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"
Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu
===================================================================
--- clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -26,8 +26,11 @@
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// Test no subtarget, which should get the denormal setting of the default gfx803
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// Test no subtarget, which should get the denormal setting of the default
+// CPU of AMDGPU target, which is 'none'.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c \
+// RUN:   -march=haswell -nocudainc -nogpulib %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=NOFTZ %s
 
 // Test multiple offload archs with different defaults.
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
Index: clang/test/CodeGenCUDA/unnamed-types.cu
===================================================================
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
-// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -2,7 +2,7 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=DEV,INT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux \
@@ -10,7 +10,7 @@
 // RUN:   -check-prefixes=HOST,INT-HOST %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN:   -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
 // RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -2,7 +2,7 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
Index: clang/test/CodeGenCUDA/norecurse.cu
===================================================================
--- clang/test/CodeGenCUDA/norecurse.cu
+++ clang/test/CodeGenCUDA/norecurse.cu
@@ -5,7 +5,8 @@
 // RUN:     -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:     -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s
+// RUN:     -target-cpu gfx906 -emit-llvm -disable-llvm-passes \
+// RUN:     -o - -x hip %s | FileCheck %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/managed-var.cu
===================================================================
--- clang/test/CodeGenCUDA/managed-var.cu
+++ clang/test/CodeGenCUDA/managed-var.cu
@@ -1,11 +1,11 @@
 // REQUIRES: x86-registered-target, amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=COMMON,DEV,NORDC-D %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev
+// RUN:   -target-cpu gfx906 -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev
 // RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,RDC-D %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
Index: clang/test/CodeGenCUDA/lambda.cu
===================================================================
--- clang/test/CodeGenCUDA/lambda.cu
+++ clang/test/CodeGenCUDA/lambda.cu
@@ -3,7 +3,7 @@
 // RUN:   | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
 // RUN:   -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 | FileCheck -check-prefix=DEV %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/lambda-reference-var.cu
===================================================================
--- clang/test/CodeGenCUDA/lambda-reference-var.cu
+++ clang/test/CodeGenCUDA/lambda-reference-var.cu
@@ -3,7 +3,7 @@
 // RUN:   | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
 // RUN:   -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 | FileCheck -check-prefix=DEV %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/kernel-dbg-info.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-dbg-info.cu
+++ clang/test/CodeGenCUDA/kernel-dbg-info.cu
@@ -5,7 +5,8 @@
 // RUN:   -o - -x hip | FileCheck -check-prefixes=CHECK,O0 %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
-// RUN:   -o - -x hip -fcuda-is-device | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 -o - -x hip -fcuda-is-device \
+// RUN:   | FileCheck -check-prefix=DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
@@ -14,7 +15,8 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
 // RUN:   -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \
-// RUN:   -fcuda-is-device | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 -fcuda-is-device \
+// RUN:   | FileCheck -check-prefix=DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O3 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
@@ -22,7 +24,8 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
 // RUN:   -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \
-// RUN:   -fcuda-is-device | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 -fcuda-is-device \
+// RUN:   | FileCheck -check-prefix=DEV %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/kernel-args.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-args.cu
+++ clang/test/CodeGenCUDA/kernel-args.cu
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:     -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
+// RUN:     -target-cpu gfx906 -emit-llvm %s -o - \
+// RUN:     | FileCheck -check-prefix=AMDGCN %s
 // RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
 // RUN:     -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
 #include "Inputs/cuda.h"
Index: clang/test/CodeGenCUDA/kernel-amdgcn.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-amdgcn.cu
+++ clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -fcuda-is-device \
+// RUN:    -emit-llvm -x hip %s -o - | FileCheck %s
 #include "Inputs/cuda.h"
 
 // CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv
Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
 // RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefixes=CHECK,DEFAULT %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     -target-cpu gfx906 -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefixes=CHECK,MAX1024 %s
 // RUN: %clang_cc1 -triple nvptx \
 // RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -1,9 +1,15 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -fcuda-is-device -emit-llvm -x hip %s -o - \
+// RUN:   | FileCheck --check-prefixes=COMMON,CHECK %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -fcuda-is-device -emit-llvm -x hip %s \
+// RUN:   -disable-O0-optnone -o - | opt -S -O2 \
+// RUN:   | FileCheck %s --check-prefixes=COMMON,OPT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:   -x hip %s -o - | FileCheck -check-prefix=HOST %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
+++ clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
@@ -1,5 +1,5 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s
 #include "Inputs/cuda.h"
 
 __global__ void hip_kernel_temp() {
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4428,6 +4428,11 @@
     }
     S.Diag(Method->getBeginLoc(), diag::warn_kern_is_method) << Method;
   }
+  if (S.getASTContext().getTargetInfo().getTargetOpts().CPU.empty() &&
+      S.getLangOpts().HIP && S.getLangOpts().CUDAIsDevice) {
+    S.Diag(FD->getBeginLoc(), diag::err_hip_kern_without_gpu);
+    return;
+  }
   // Only warn for "inline" when compiling for host, to cut down on noise.
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
     S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -293,8 +293,11 @@
 
   if (!BoundArch.empty()) {
     DAL->eraseArg(options::OPT_mcpu_EQ);
-    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
-    checkTargetID(*DAL);
+    if (BoundArch != "unknown") {
+      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ),
+                        BoundArch);
+      checkTargetID(*DAL);
+    }
   }
 
   return DAL;
@@ -355,7 +358,8 @@
 llvm::SmallVector<std::string, 12>
 HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
   llvm::SmallVector<std::string, 12> BCLibs;
-  if (DriverArgs.hasArg(options::OPT_nogpulib))
+  StringRef GpuArch = getGPUArch(DriverArgs);
+  if (DriverArgs.hasArg(options::OPT_nogpulib) || GpuArch.empty())
     return {};
   ArgStringList LibraryPaths;
 
@@ -386,7 +390,6 @@
       getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
       return {};
     }
-    StringRef GpuArch = getGPUArch(DriverArgs);
     assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
     (void)GpuArch;
     auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8256,6 +8256,9 @@
 def warn_kern_is_inline : Warning<
   "ignored 'inline' attribute on kernel function %0">,
   InGroup<CudaCompat>;
+def err_hip_kern_without_gpu : Error<
+  "compile HIP kernel without specifying offload arch is not allowed">,
+  DefaultFatal;
 def err_variadic_device_fn : Error<
   "CUDA device code does not support variadic functions">;
 def err_va_arg_in_device : Error<
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to