llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang Author: Yaxun (Sam) Liu (yxsamliu) <details> <summary>Changes</summary> wavefront size 64 is not fully supported for HIP on gfx10+ and it is not tested. As such, currently HIP header contains an pragma error to diagnose such use case by detecting predefined macro `__AMDGCN_WAVEFRONT_SIZE`. However, since clang will remove it and replace it with a builtin function, there is no good way to diagnose such invalid use of wavefront size 64 with gfx10+ in HIP header. Therefore this patch diagnoses uses of -mwavefrontsize64 with gfx10+ in HIPAMD toolchain. This patch also introduces option `-mforce-unsafe-wavefrontsize64` to allow power users to force wavefront size 64 on gfx10+, with an assumption that they understand its risk. --- Full diff: https://github.com/llvm/llvm-project/pull/140185.diff 7 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticDriverKinds.td (+3) - (modified) clang/include/clang/Driver/Options.td (+2) - (modified) clang/lib/Driver/ToolChains/AMDGPU.cpp (+25-11) - (modified) clang/lib/Driver/ToolChains/AMDGPU.h (+7-2) - (modified) clang/lib/Driver/ToolChains/HIPAMD.cpp (+3) - (modified) clang/test/Driver/hip-macros.hip (+2-2) - (modified) clang/test/Driver/hip-toolchain-features.hip (+7-1) ``````````diff diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 4da8f80345ddc..42c6f11e5cbdb 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -200,6 +200,9 @@ def err_drv_opt_unsupported_input_type : Error< "'%0' invalid for input of type %1">; def err_drv_amdgpu_ieee_without_no_honor_nans : Error< "invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">; +def err_drv_wave64_requires_force_unsafe : Error< + "'-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture " + "gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override">; def err_drv_argument_not_allowed_with : Error< "invalid argument '%0' not allowed with '%1'">; def err_drv_cannot_open_randomize_layout_seed_file : Error< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bd8df8f6a749a..41da9c3bff3bf 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5257,6 +5257,8 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable", defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64", "Specify wavefront size 64", "Specify wavefront size 32", " mode (AMDGPU only)">; +defm force_unsafe_wavefrontsize64 : SimpleMFlag<"force-unsafe-wavefrontsize64", + "Force", "Do not force", " wavefront size 64 for gfx10+ GPUs (AMDGPU only)">; defm amdgpu_precise_memory_op : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable", " precise memory mode (AMDGPU only)">; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 35ca019795ddc..d7ce1ca1ca4e7 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -606,6 +606,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, // Add target ID features to -target-feature options. No diagnostics should // be emitted here since invalid target ID is diagnosed at other places. StringRef TargetID; + StringRef GpuArch; if (Args.hasArg(options::OPT_mcpu_EQ)) TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); else if (Args.hasArg(options::OPT_march_EQ)) @@ -614,7 +615,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, llvm::StringMap<bool> FeatureMap; auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap); if (OptionalGpuArch) { - StringRef GpuArch = *OptionalGpuArch; + GpuArch = *OptionalGpuArch; // Iterate through all possible target ID features for the given GPU. // If it is mapped to true, add +feature. // If it is mapped to false, add -feature. @@ -628,9 +629,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, } } } - - if (Args.hasFlag(options::OPT_mwavefrontsize64, - options::OPT_mno_wavefrontsize64, false)) + if (toolchains::AMDGPUToolChain::isWave64( + D, Args, llvm::AMDGPU::parseArchAMDGCN(GpuArch), + /*DiagInvalid=*/false, + /*Explicit=*/true)) Features.push_back("+wavefrontsize64"); if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op, @@ -767,15 +769,27 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( llvm::DenormalMode::getIEEE(); } -bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, - llvm::AMDGPU::GPUKind Kind) { +bool AMDGPUToolChain::isWave64(const Driver &D, + const llvm::opt::ArgList &DriverArgs, + llvm::AMDGPU::GPUKind Kind, bool DiagInvalid, + bool Explicit) { const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); - return !HasWave32 || DriverArgs.hasFlag( - options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); -} + bool EnableWave64 = DriverArgs.hasFlag( + options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); + bool ForceUnsafeWave64 = + DriverArgs.hasFlag(options::OPT_mforce_unsafe_wavefrontsize64, + options::OPT_mno_force_unsafe_wavefrontsize64, false); + + if (DiagInvalid && HasWave32 && EnableWave64 && !ForceUnsafeWave64) + D.Diag(diag::err_drv_wave64_requires_force_unsafe); + if (Explicit) + return EnableWave64 || ForceUnsafeWave64; + + return !HasWave32 || EnableWave64 || ForceUnsafeWave64; +} /// ROCM Toolchain ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, @@ -884,7 +898,7 @@ void ROCMToolChain::addClangTargetOptions( ABIVer)) return; - bool Wave64 = isWave64(DriverArgs, Kind); + bool Wave64 = isWave64(getDriver(), DriverArgs, Kind); // TODO: There are way too many flags that change this. Do we need to check // them all? bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || @@ -1012,7 +1026,7 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, bool CorrectSqrt = DriverArgs.hasFlag( options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true); - bool Wave64 = isWave64(DriverArgs, Kind); + bool Wave64 = isWave64(getDriver(), DriverArgs, Kind); // GPU Sanitizer currently only supports ASan and is enabled through host // ASan. diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 08bd4fa556f78..d9d0fac71eb91 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -89,8 +89,13 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { const llvm::opt::ArgList &DriverArgs, const JobAction &JA, const llvm::fltSemantics *FPType = nullptr) const override; - static bool isWave64(const llvm::opt::ArgList &DriverArgs, - llvm::AMDGPU::GPUKind Kind); + /// Return whether the GPU of kind \p Kind has wavefront size 64 by driver + /// arguments \p DriverArgs. When \p Explicit is true, only return true + /// if wavefront size is set to 64 by arguments explicitly. When + /// \p DiagInvalid is true, diagnose invalid -mwavefrontsize64 arguments. + static bool isWave64(const Driver &D, const llvm::opt::ArgList &DriverArgs, + llvm::AMDGPU::GPUKind Kind, bool DiagInvalid = false, + bool Explicit = false); /// Needed for using lto. bool HasNativeLLVMSupport() const override { return true; diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index abda4eb453387..44f1a3e915283 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -229,6 +229,9 @@ void HIPAMDToolChain::addClangTargetOptions( assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); + const StringRef GpuArch = getGPUArch(DriverArgs); + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); + (void)isWave64(getDriver(), DriverArgs, Kind, /*DiagInvalid=*/true); CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"}); diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip index bd93f9985a774..37fdafc53af85 100644 --- a/clang/test/Driver/hip-macros.hip +++ b/clang/test/Driver/hip-macros.hip @@ -2,7 +2,7 @@ // RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ // RUN: --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ +// RUN: %clang -E -dM --offload-arch=gfx1010 -mforce-unsafe-wavefrontsize64 \ // RUN: --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s // RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ @@ -16,7 +16,7 @@ // RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s // RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \ // RUN: --cuda-device-only -nogpuinc -nogpulib \ -// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: -mforce-unsafe-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s // WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 64 // WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 32 // WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 diff --git a/clang/test/Driver/hip-toolchain-features.hip b/clang/test/Driver/hip-toolchain-features.hip index dbc007ac1335b..1de21d9a0ff06 100644 --- a/clang/test/Driver/hip-toolchain-features.hip +++ b/clang/test/Driver/hip-toolchain-features.hip @@ -67,9 +67,15 @@ // DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64" // DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode" -// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \ +// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \ // RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \ // RUN: -mno-wavefrontsize64 -mwavefrontsize64 2>&1 \ +// RUN: | FileCheck %s -check-prefix=WAVE64-ERR +// WAVE64-ERR: error: '-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override + +// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \ +// RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \ +// RUN: -mno-wavefrontsize64 -mforce-unsafe-wavefrontsize64 2>&1 \ // RUN: | FileCheck %s -check-prefix=WAVE64 // WAVE64: {{.*}}clang{{.*}} "-target-feature" "+wavefrontsize64" // WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize16" `````````` </details> https://github.com/llvm/llvm-project/pull/140185 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits