yaxunl updated this revision to Diff 337883.
yaxunl marked an inline comment as done.
yaxunl added a comment.

revised error msg by Aaron's comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100552/new/

https://reviews.llvm.org/D100552

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
  clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
  clang/test/CodeGenCUDA/kernel-amdgcn.cu
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenCUDA/kernel-dbg-info.cu
  clang/test/CodeGenCUDA/lambda-reference-var.cu
  clang/test/CodeGenCUDA/lambda.cu
  clang/test/CodeGenCUDA/managed-var.cu
  clang/test/CodeGenCUDA/norecurse.cu
  clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
  clang/test/CodeGenCUDA/static-device-var-rdc.cu
  clang/test/CodeGenCUDA/unnamed-types.cu
  clang/test/Driver/cuda-flush-denormals-to-zero.cu
  clang/test/Driver/hip-default-gpu-arch.hip
  clang/test/SemaCUDA/kernel-no-gpu.cu

Index: clang/test/SemaCUDA/kernel-no-gpu.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/kernel-no-gpu.cu
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -fcuda-is-device -verify=hip -x hip %s
+// RUN: %clang_cc1 -fcuda-is-device -verify=cuda %s
+// cuda-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+__global__ void kern1() {}
+// hip-error@-1 {{compiling a HIP kernel without specifying an offload arch is not allowed}}
+
+// Make sure the error is emitted once.
+__global__ void kern2() {}
Index: clang/test/Driver/hip-default-gpu-arch.hip
===================================================================
--- clang/test/Driver/hip-default-gpu-arch.hip
+++ clang/test/Driver/hip-default-gpu-arch.hip
@@ -4,4 +4,5 @@
 
 // RUN: %clang -### -c %s 2>&1 | FileCheck %s
 
-// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx803"
+// CHECK-NOT: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu"
+// CHECK: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"
Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu
===================================================================
--- clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -26,8 +26,11 @@
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// Test no subtarget, which should get the denormal setting of the default gfx803
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// Test no subtarget, which should get the denormal setting of the default
+// CPU of AMDGPU target, which is 'none'.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c \
+// RUN:   -march=haswell -nocudainc -nogpulib %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=NOFTZ %s
 
 // Test multiple offload archs with different defaults.
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
Index: clang/test/CodeGenCUDA/unnamed-types.cu
===================================================================
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
-// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -2,7 +2,7 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=DEV,INT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux \
@@ -10,7 +10,7 @@
 // RUN:   -check-prefixes=HOST,INT-HOST %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN:   -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
 // RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -2,7 +2,7 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
Index: clang/test/CodeGenCUDA/norecurse.cu
===================================================================
--- clang/test/CodeGenCUDA/norecurse.cu
+++ clang/test/CodeGenCUDA/norecurse.cu
@@ -5,7 +5,8 @@
 // RUN:     -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:     -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s
+// RUN:     -target-cpu gfx906 -emit-llvm -disable-llvm-passes \
+// RUN:     -o - -x hip %s | FileCheck %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/managed-var.cu
===================================================================
--- clang/test/CodeGenCUDA/managed-var.cu
+++ clang/test/CodeGenCUDA/managed-var.cu
@@ -1,11 +1,11 @@
 // REQUIRES: x86-registered-target, amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -target-cpu gfx906 -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=COMMON,DEV,NORDC-D %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev
+// RUN:   -target-cpu gfx906 -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev
 // RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,RDC-D %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
Index: clang/test/CodeGenCUDA/lambda.cu
===================================================================
--- clang/test/CodeGenCUDA/lambda.cu
+++ clang/test/CodeGenCUDA/lambda.cu
@@ -3,7 +3,7 @@
 // RUN:   | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
 // RUN:   -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 | FileCheck -check-prefix=DEV %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/lambda-reference-var.cu
===================================================================
--- clang/test/CodeGenCUDA/lambda-reference-var.cu
+++ clang/test/CodeGenCUDA/lambda-reference-var.cu
@@ -3,7 +3,7 @@
 // RUN:   | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
 // RUN:   -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 | FileCheck -check-prefix=DEV %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/kernel-dbg-info.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-dbg-info.cu
+++ clang/test/CodeGenCUDA/kernel-dbg-info.cu
@@ -5,7 +5,8 @@
 // RUN:   -o - -x hip | FileCheck -check-prefixes=CHECK,O0 %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
-// RUN:   -o - -x hip -fcuda-is-device | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 -o - -x hip -fcuda-is-device \
+// RUN:   | FileCheck -check-prefix=DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
@@ -14,7 +15,8 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
 // RUN:   -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \
-// RUN:   -fcuda-is-device | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 -fcuda-is-device \
+// RUN:   | FileCheck -check-prefix=DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O3 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
@@ -22,7 +24,8 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \
 // RUN:   -fcuda-include-gpubinary %t -debug-info-kind=limited \
 // RUN:   -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \
-// RUN:   -fcuda-is-device | FileCheck -check-prefix=DEV %s
+// RUN:   -target-cpu gfx906 -fcuda-is-device \
+// RUN:   | FileCheck -check-prefix=DEV %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/kernel-args.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-args.cu
+++ clang/test/CodeGenCUDA/kernel-args.cu
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:     -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
+// RUN:     -target-cpu gfx906 -emit-llvm %s -o - \
+// RUN:     | FileCheck -check-prefix=AMDGCN %s
 // RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
 // RUN:     -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
 #include "Inputs/cuda.h"
Index: clang/test/CodeGenCUDA/kernel-amdgcn.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-amdgcn.cu
+++ clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -fcuda-is-device \
+// RUN:    -emit-llvm -x hip %s -o - | FileCheck %s
 #include "Inputs/cuda.h"
 
 // CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv
Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
 // RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefixes=CHECK,DEFAULT %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     -target-cpu gfx906 -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefixes=CHECK,MAX1024 %s
 // RUN: %clang_cc1 -triple nvptx \
 // RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -1,9 +1,15 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -fcuda-is-device -emit-llvm -x hip %s -o - \
+// RUN:   | FileCheck --check-prefixes=COMMON,CHECK %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -fcuda-is-device -emit-llvm -x hip %s \
+// RUN:   -disable-O0-optnone -o - | opt -S -O2 \
+// RUN:   | FileCheck %s --check-prefixes=COMMON,OPT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:   -x hip %s -o - | FileCheck -check-prefix=HOST %s
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
+++ clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
@@ -1,5 +1,5 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm -x hip -o - %s | FileCheck %s
 #include "Inputs/cuda.h"
 
 __global__ void hip_kernel_temp() {
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4428,6 +4428,11 @@
     }
     S.Diag(Method->getBeginLoc(), diag::warn_kern_is_method) << Method;
   }
+  if (S.getASTContext().getTargetInfo().getTargetOpts().CPU.empty() &&
+      S.getLangOpts().HIP && S.getLangOpts().CUDAIsDevice) {
+    S.Diag(FD->getBeginLoc(), diag::err_hip_kern_without_gpu);
+    return;
+  }
   // Only warn for "inline" when compiling for host, to cut down on noise.
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
     S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -293,8 +293,11 @@
 
   if (!BoundArch.empty()) {
     DAL->eraseArg(options::OPT_mcpu_EQ);
-    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
-    checkTargetID(*DAL);
+    if (BoundArch != "unknown") {
+      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ),
+                        BoundArch);
+      checkTargetID(*DAL);
+    }
   }
 
   return DAL;
@@ -355,7 +358,8 @@
 llvm::SmallVector<std::string, 12>
 HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
   llvm::SmallVector<std::string, 12> BCLibs;
-  if (DriverArgs.hasArg(options::OPT_nogpulib))
+  StringRef GpuArch = getGPUArch(DriverArgs);
+  if (DriverArgs.hasArg(options::OPT_nogpulib) || GpuArch.empty())
     return {};
   ArgStringList LibraryPaths;
 
@@ -386,7 +390,6 @@
       getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
       return {};
     }
-    StringRef GpuArch = getGPUArch(DriverArgs);
     assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
     (void)GpuArch;
     auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8256,6 +8256,9 @@
 def warn_kern_is_inline : Warning<
   "ignored 'inline' attribute on kernel function %0">,
   InGroup<CudaCompat>;
+def err_hip_kern_without_gpu : Error<
+  "compiling a HIP kernel without specifying an offload arch is not allowed">,
+  DefaultFatal;
 def err_variadic_device_fn : Error<
   "CUDA device code does not support variadic functions">;
 def err_va_arg_in_device : Error<
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to