saiislam updated this revision to Diff 264529. saiislam added a comment. Removed isOpenMPGPU() to avoid defining OpenMP compatibility of an architecture. Reverting back to explicitly checking NVPTX and AMDGCN architectures. Also, split handling of NVPTX's and AMDGCN's handling of getBuiltinID. For AMDGCN it now uses OpenMPIsDevice LangOpt and returns 0 for every device library function, except for printf and malloc.
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D79754/new/ https://reviews.llvm.org/D79754 Files: clang/lib/AST/Decl.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/Driver/openmp-offload-gpu.c clang/test/OpenMP/target_parallel_no_exceptions.cpp llvm/include/llvm/ADT/Triple.h
Index: llvm/include/llvm/ADT/Triple.h =================================================================== --- llvm/include/llvm/ADT/Triple.h +++ llvm/include/llvm/ADT/Triple.h @@ -692,6 +692,9 @@ return getArch() == Triple::nvptx || getArch() == Triple::nvptx64; } + /// Tests whether the target is AMDGCN + bool isAMDGCN() const { return getArch() == Triple::amdgcn; } + bool isAMDGPU() const { return getArch() == Triple::r600 || getArch() == Triple::amdgcn; } Index: clang/test/OpenMP/target_parallel_no_exceptions.cpp =================================================================== --- clang/test/OpenMP/target_parallel_no_exceptions.cpp +++ clang/test/OpenMP/target_parallel_no_exceptions.cpp @@ -1,6 +1,7 @@ /// Make sure no exception messages are inclided in the llvm output. // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION void test_increment() { #pragma omp target Index: clang/test/Driver/openmp-offload-gpu.c =================================================================== --- clang/test/Driver/openmp-offload-gpu.c +++ clang/test/Driver/openmp-offload-gpu.c @@ -6,6 +6,7 @@ // REQUIRES: x86-registered-target // REQUIRES: powerpc-registered-target // REQUIRES: nvptx-registered-target +// REQUIRES: amdgpu-registered-target /// ########################################################################### @@ -249,30 +250,49 @@ // HAS_DEBUG-SAME: "--return-at-end" // HAS_DEBUG: nvlink // HAS_DEBUG-SAME: "-g" +// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}" +// CUDA_MODE-SAME: "-fopenmp-cuda-mode" +// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_MODE %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_MODE %s -// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// CUDA_MODE-SAME: "-fopenmp-cuda-mode" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s -// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode" + +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s + +// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}" +// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime" +// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=FULL_RUNTIME %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=FULL_RUNTIME %s -// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s -// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime" + +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -3098,7 +3098,8 @@ // Set the flag to prevent the implementation from emitting device exception // handling code for those requiring so. - if ((Opts.OpenMPIsDevice && T.isNVPTX()) || Opts.OpenCLCPlusPlus) { + if ((Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN())) || + Opts.OpenCLCPlusPlus) { Opts.Exceptions = 0; Opts.CXXExceptions = 0; } @@ -3132,6 +3133,7 @@ TT.getArch() == llvm::Triple::ppc64le || TT.getArch() == llvm::Triple::nvptx || TT.getArch() == llvm::Triple::nvptx64 || + TT.getArch() == llvm::Triple::amdgcn || TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); @@ -3149,13 +3151,13 @@ << Opts.OMPHostIRFile; } - // Set CUDA mode for OpenMP target NVPTX if specified in options - Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() && + // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options + Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && Args.hasArg(options::OPT_fopenmp_cuda_mode); - // Set CUDA mode for OpenMP target NVPTX if specified in options + // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options Opts.OpenMPCUDAForceFullRuntime = - Opts.OpenMPIsDevice && T.isNVPTX() && + Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && Args.hasArg(options::OPT_fopenmp_cuda_force_full_runtime); // Record whether the __DEPRECATED define was requested. Index: clang/lib/AST/Decl.cpp =================================================================== --- clang/lib/AST/Decl.cpp +++ clang/lib/AST/Decl.cpp @@ -3221,6 +3221,14 @@ !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; + // HIP does not have device-side standard library. printf and malloc are + // the only special cases that are supported by device-side runtime. + if (Context.getTargetInfo().getTriple().isAMDGCN() && + Context.getLangOpts().OpenMPIsDevice && + Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID) && + !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) + return 0; + return BuiltinID; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits