MI210 is xnack disabled by default. Compile with :xnack+ will produce GPU kernels that cannot be run on MI210.
Two targets, gfx90a_xnack and gfx90a_noxnack are introduced to replace gfx90a, for the following reason: Upstream usually ships 2 version: gfx90a:xnack-, gfx90a:xnack+. Although a single gfx90a should have maximum compatibility, According to [1,2], compile with xnack+/xnack- may have better performance on xnack enabled/disabled GPUs. Therefore we ship both the target, align with upstream. gfx900 is also appended with :xnack- to align with upstream default. However, some ROCm libraries does not need xnack feature flag, such as sci-libs/miopen and sci-libs/rocFFT, so "--no-xnack-flag" option is introduced to stop appending xnack feature. Demonstrate this option in the second example ebuild. Update description file profiles/desc/amdgpu_targets.desc accordingly. Github PR: https://github.com/gentoo/gentoo/pull/33861 [1] https://llvm.org/docs/AMDGPUUsage.html#target-features [2] https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#compiling-hip-kernels-for-specific-xnack-modes Signed-off-by: Yiyang Wu <xgreenlandfor...@gmail.com> --- eclass/rocm.eclass | 31 ++++++++++++++++++++++--------- profiles/desc/amdgpu_targets.desc | 5 ++++- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/eclass/rocm.eclass b/eclass/rocm.eclass index b78dfea1cc31..3209c003e323 100644 --- a/eclass/rocm.eclass +++ b/eclass/rocm.eclass @@ -60,7 +60,8 @@ # # Examples for packages depend on ROCm libraries -- a package which depends on # rocBLAS, uses comma separated ${HCC_AMDGPU_TARGET} to determine GPU -# architectures, and requires ROCm version >=5.1 +# architectures which does not accept :xnack feature flag, and requires ROCm +# version >=5.1 # @CODE # ROCM_VERSION=5.1 # inherit rocm @@ -71,7 +72,7 @@ # # src_configure() { # if use rocm; then -# local amdgpu_flags=$(get_amdgpu_flags) +# local amdgpu_flags=$(get_amdgpu_flags --no-xnack-flag) # export HCC_AMDGPU_TARGET=${amdgpu_flags//;/,} # fi # default @@ -143,7 +144,7 @@ _rocm_set_globals() { gfx803 gfx900 gfx1010 gfx1011 gfx1012 gfx1031 ) official_amdgpu_targets=( - gfx906 gfx908 gfx90a gfx1030 + gfx906 gfx908 gfx90a_xnack gfx90a_noxnack gfx1030 ) ;; 5.*|9999) @@ -152,7 +153,7 @@ _rocm_set_globals() { gfx1031 gfx1100 gfx1101 gfx1102 ) official_amdgpu_targets=( - gfx906 gfx908 gfx90a gfx1030 + gfx906 gfx908 gfx90a_xnack gfx90a_noxnack gfx1030 ) ;; *) @@ -181,26 +182,38 @@ unset -f _rocm_set_globals # @FUNCTION: get_amdgpu_flags -# @USAGE: get_amdgpu_flags +# @USAGE: get_amdgpu_flags [--no-xnack-flag] # @DESCRIPTION: # Convert specified use flag of amdgpu_targets to compilation flags. -# Append default target feature to GPU arch. See +# Append default target feature xnack to GPU arch. See # https://llvm.org/docs/AMDGPUUsage.html#target-features +# If specified with --no-xnack-flag, do not append xnack feature flag. get_amdgpu_flags() { local amdgpu_target_flags for gpu_target in ${AMDGPU_TARGETS}; do local target_feature= case ${gpu_target} in - gfx906|gfx908) + gfx900|gfx906|gfx908) + # These GPUs ususally does not enable xnack, so + # disabling xnack generates faster GPU kernels. target_feature=:xnack- ;; - gfx90a) + gfx90a_noxnack) + gpu_target=gfx90a + target_feature=:xnack- + ;; + gfx90a_xnack) + gpu_target=gfx90a target_feature=:xnack+ ;; *) ;; esac - amdgpu_target_flags+="${gpu_target}${target_feature};" + if [[ "$1" == "--no-xnack-flag" ]]; then + amdgpu_target_flags+="${gpu_target};" + else + amdgpu_target_flags+="${gpu_target}${target_feature};" + fi done echo "${amdgpu_target_flags}" } diff --git a/profiles/desc/amdgpu_targets.desc b/profiles/desc/amdgpu_targets.desc index 9c5739e9d9a4..77ffa43399f1 100644 --- a/profiles/desc/amdgpu_targets.desc +++ b/profiles/desc/amdgpu_targets.desc @@ -4,12 +4,15 @@ # Reference: # GPU name and Architecture codename: https://github.com/GPUOpen-Tools/device_info/blob/master/DeviceInfo.cpp # See also: https://www.coelacanth-dream.com/posts/2019/12/30/did-rid-product-matome-p2/#fn:67 +# xnack feature reference: https://llvm.org/docs/AMDGPUUsage.html#target-features +# See also: https://wiki.gentoo.org/wiki/ROCm#XNACK_target_feature gfx803 - Fiji GPU, codename fiji, including Radeon R9 Nano/Fury/FuryX, Radeon Pro Duo, FirePro S9300x2, Radeon Instinct MI8 gfx900 - Vega GPU, codename vega10, including Radeon Vega Frontier Edition, Radeon RX Vega 56/64, Radeon RX Vega 64 Liquid, Radeon Pro Vega 48/56/64/64X, Radeon Pro WX 8200/9100, Radeon Pro V320/V340/SSG, Radeon Instinct MI25 gfx906 - Vega GPU, codename vega20, including Radeon (Pro) VII, Radeon Instinct MI50/MI60 gfx908 - CDNA Accelerator, codename arcturus, including AMD Instinct MI100 Accelerator -gfx90a - CDNA2 Accelerator, codename aldebaran, including AMD Instinct MI200 series Accelerators +gfx90a_xnack - CDNA2: same as gfx90a_noxnack, with xnack enabled. Compatible with xnack enabled GPU state. +gfx90a_noxnack - CDNA2 Accelerator, codename aldebaran, including AMD Instinct MI200 series Accelerators, compatible with xnack disabled GPU state. gfx1010 - RDNA GPU, codename navi10, including Radeon RX 5700XT/5700/5700M/5700B/5700XTB/5600XT/5600/5600M, Radeon Pro 5700XT/5700, Radeon Pro W5700X/W5700 gfx1011 - RDNA GPU, codename navi12, including Radeon Pro 5600M/V520 gfx1012 - RDNA GPU, codename navi14, including Radeon RX 5500XT/5500/5500M/5500XTB/5300/5300M, Radeon Pro 5500XT/5500M/5300/5300M, Radeon Pro W5500X/W5500/W5500M/W5300M -- 2.41.0