yaxunl created this revision. yaxunl added reviewers: tra, arsenm. Herald added subscribers: kerbowa, nhaehnle, jvesely. yaxunl requested review of this revision. Herald added a subscriber: wdng.
The static variable causes it only initialized once and take the same value for different GPU archs, whereas they may be different for different GPU archs, e.g. when there are both gfx900 and gfx1010. Removing `static` fixes that. https://reviews.llvm.org/D92628 Files: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/test/Driver/hip-wavefront-size.hip Index: clang/test/Driver/hip-wavefront-size.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-wavefront-size.hip @@ -0,0 +1,21 @@ +// REQUIRES: clang-driver,amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm --cuda-device-only %s \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=WAVE64 +// WAVE64: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc"{{.*}} "-target-cpu" "gfx900" + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx1010 \ +// RUN: --rocm-path=%S/Inputs/rocm --cuda-device-only %s \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=WAVE32 +// WAVE32: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_off.bc"{{.*}} "-target-cpu" "gfx1010" + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx1010 \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm --cuda-device-only %s \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=BOTH +// BOTH-DAG: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc"{{.*}} "-target-cpu" "gfx900" +// BOTH-DAG: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_off.bc"{{.*}} "-target-cpu" "gfx1010" Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -499,7 +499,7 @@ bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, llvm::AMDGPU::GPUKind Kind) { const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); - static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); + bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); return !HasWave32 || DriverArgs.hasFlag( options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
Index: clang/test/Driver/hip-wavefront-size.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-wavefront-size.hip @@ -0,0 +1,21 @@ +// REQUIRES: clang-driver,amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm --cuda-device-only %s \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=WAVE64 +// WAVE64: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc"{{.*}} "-target-cpu" "gfx900" + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx1010 \ +// RUN: --rocm-path=%S/Inputs/rocm --cuda-device-only %s \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=WAVE32 +// WAVE32: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_off.bc"{{.*}} "-target-cpu" "gfx1010" + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx1010 \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm --cuda-device-only %s \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=BOTH +// BOTH-DAG: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc"{{.*}} "-target-cpu" "gfx900" +// BOTH-DAG: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_off.bc"{{.*}} "-target-cpu" "gfx1010" Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -499,7 +499,7 @@ bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, llvm::AMDGPU::GPUKind Kind) { const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); - static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); + bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); return !HasWave32 || DriverArgs.hasFlag( options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits