https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/114481
>From 3ba88ce598aaab269169f0a5db5981c9a9ac8603 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Thu, 31 Oct 2024 22:38:36 +0000 Subject: [PATCH 1/4] Add pass to handle AMDGCN pseudo-intrinsics (abstract placeholders for target specific info), and add handling for `llvm.amdgcn.wavefrontsize`. --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 5 +- llvm/lib/Target/AMDGPU/AMDGPU.h | 9 ++ .../AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp | 49 +++++++++ llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 + .../AMDGPU/llvm.amdgcn.wavefrontsize.ll | 99 ++++++++++++++----- 7 files changed, 139 insertions(+), 29 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index bf5f2971cf118c..de6a06dad6a08d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK-AMDGCN,CHECK %s -// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefix=CHECK %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK,CHECK-SPIRV %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -866,7 +866,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu // CHECK-LABEL test_wavefrontsize( unsigned test_wavefrontsize() { - // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize() + // CHECK-AMDGCN: ret i32 {{[0-9]+}} + // CHECK-SPIRV: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize() return __builtin_amdgcn_wavefrontsize(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 95d0ad0f9dc96a..17d3e6ab7c65ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -345,6 +345,15 @@ extern char &AMDGPUPrintfRuntimeBindingID; void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); extern char &AMDGPUResourceUsageAnalysisID; +struct AMDGPUExpandPseudoIntrinsicsPass + : PassInfoMixin<AMDGPUExpandPseudoIntrinsicsPass> { + const AMDGPUTargetMachine &TM; + AMDGPUExpandPseudoIntrinsicsPass(const AMDGPUTargetMachine &ATM) : TM(ATM) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } +}; + struct AMDGPUPrintfRuntimeBindingPass : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp new file mode 100644 index 00000000000000..faa23bb8550dbc --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp @@ -0,0 +1,49 @@ +//===- AMDGPUExpandPseudoIntrinsics.cpp - Pseudo Intrinsic Expander Pass --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements a pass that deals with expanding AMDGCN generic pseudo- +// intrinsics into target specific quantities / sequences. In this context, a +// pseudo-intrinsic is an AMDGCN intrinsic that does not directly map to a +// specific instruction, but rather is intended as a mechanism for abstractly +// conveying target specific info to a HLL / the FE, without concretely +// impacting the AST. An example of such an intrinsic is amdgcn.wavefrontsize. +// This pass should run as early as possible / immediately after Clang CodeGen, +// so that the optimisation pipeline and the BE operate with concrete target +// data. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" + +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +static inline PreservedAnalyses expandWaveSizeIntrinsic(const GCNSubtarget &ST, + Function *WaveSize) { + if (WaveSize->hasZeroLiveUses()) + return PreservedAnalyses::all(); + + for (auto &&U : WaveSize->users()) + U->replaceAllUsesWith(ConstantInt::get(WaveSize->getReturnType(), + ST.getWavefrontSize())); + + return PreservedAnalyses::none(); +} + +PreservedAnalyses + AMDGPUExpandPseudoIntrinsicsPass::run(Module &M, ModuleAnalysisManager &) { + + if (auto WS = M.getFunction("llvm.amdgcn.wavefrontsize")) + return expandWaveSizeIntrinsic(TM.getSubtarget<GCNSubtarget>(*WS), WS); + + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 174a90f0aa419d..323c195c329168 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -27,6 +27,8 @@ MODULE_PASS("amdgpu-perf-hint", *static_cast<const GCNTargetMachine *>(this))) MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass()) MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass()) +MODULE_PASS("amdgpu-expand-pseudo-intrinsics", + AMDGPUExpandPseudoIntrinsicsPass(*this)) #undef MODULE_PASS #ifndef MODULE_PASS_WITH_PARAMS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index d93ec34a703d3d..2bf8df6588c59c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -739,7 +739,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { #include "llvm/Passes/TargetPassRegistry.inc" PB.registerPipelineStartEPCallback( - [](ModulePassManager &PM, OptimizationLevel Level) { + [this](ModulePassManager &PM, OptimizationLevel Level) { + PM.addPass(AMDGPUExpandPseudoIntrinsicsPass(*this)); FunctionPassManager FPM; PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); if (EnableHipStdPar) diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index fed29c3e14aae2..c9d4452b4a035c 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUCodeGenPrepare.cpp AMDGPUCombinerHelper.cpp AMDGPUCtorDtorLowering.cpp + AMDGPUExpandPseudoIntrinsics.cpp AMDGPUExportClustering.cpp AMDGPUFrameLowering.cpp AMDGPUGlobalISelDivergenceLowering.cpp diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll index 824d3708c027db..efa53def5ee686 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s @@ -5,28 +6,43 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s ; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT-W64 %s ; GCN-LABEL: {{^}}fold_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( ; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32 ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: store i32 %tmp, ptr addrspace(1) %arg, align 4 -; OPT-NEXT: ret void define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR2:[0-9]+]] +; OPT-NEXT: store i32 [[TMP]], ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: store i32 64, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: store i32 32, ptr addrspace(1) [[ARG]], align 4 +; OPT-W32-NEXT: ret void +; bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 store i32 %tmp, ptr addrspace(1) %arg, align 4 @@ -34,20 +50,35 @@ bb: } ; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( ; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}} ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}} ; GCN-NOT: cndmask ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT: %tmp2 = select i1 %tmp1, i32 2, i32 1 -; OPT: store i32 %tmp2, ptr addrspace(1) %arg -; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0]] { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR2]] +; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1 +; OPT-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: store i32 2, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-W32-NEXT: ret void +; bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 %tmp1 = icmp ugt i32 %tmp, 32 @@ -57,15 +88,31 @@ bb: } ; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( - -; OPT: bb: -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT: bb3: -; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0]] { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR2]] +; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]] +; OPT: [[BB2]]: +; OPT-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: br label %[[BB3]] +; OPT: [[BB3]]: +; OPT-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture readnone [[ARG:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: ret void +; bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 %tmp1 = icmp ugt i32 %tmp, 32 >From 826c291f59f05cb7065dceb6052f3d8b7bf33f57 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 1 Nov 2024 01:01:19 +0000 Subject: [PATCH 2/4] Implement review feedback. --- .../AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp index faa23bb8550dbc..b46097bbd33e99 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" @@ -33,17 +34,22 @@ static inline PreservedAnalyses expandWaveSizeIntrinsic(const GCNSubtarget &ST, return PreservedAnalyses::all(); for (auto &&U : WaveSize->users()) - U->replaceAllUsesWith(ConstantInt::get(WaveSize->getReturnType(), - ST.getWavefrontSize())); + U->replaceAllUsesWith( + ConstantInt::get(WaveSize->getReturnType(), ST.getWavefrontSize())); return PreservedAnalyses::none(); } PreservedAnalyses - AMDGPUExpandPseudoIntrinsicsPass::run(Module &M, ModuleAnalysisManager &) { +AMDGPUExpandPseudoIntrinsicsPass::run(Module &M, ModuleAnalysisManager &) { + if (M.empty()) + return PreservedAnalyses::all(); + + const auto &ST = TM.getSubtarget<GCNSubtarget>(*M.begin()); - if (auto WS = M.getFunction("llvm.amdgcn.wavefrontsize")) - return expandWaveSizeIntrinsic(TM.getSubtarget<GCNSubtarget>(*WS), WS); + if (auto WS = + Intrinsic::getDeclarationIfExists(&M, Intrinsic::amdgcn_wavefrontsize)) + return expandWaveSizeIntrinsic(ST, WS); return PreservedAnalyses::all(); } >From ab6f5a22a2442468f2ef0a7f18239f858b6320b7 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 1 Nov 2024 02:02:08 +0000 Subject: [PATCH 3/4] Do not fold early for `generic` mcpu. --- llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp | 4 ++++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp index b46097bbd33e99..fb2ef7b7ed2d71 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp @@ -47,6 +47,10 @@ AMDGPUExpandPseudoIntrinsicsPass::run(Module &M, ModuleAnalysisManager &) { const auto &ST = TM.getSubtarget<GCNSubtarget>(*M.begin()); + // This is not a concrete target, we should not fold early. + if (ST.getCPU().empty() || ST.getCPU() == "generic") + return PreservedAnalyses::all(); + if (auto WS = Intrinsic::getDeclarationIfExists(&M, Intrinsic::amdgcn_wavefrontsize)) return expandWaveSizeIntrinsic(ST, WS); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll index efa53def5ee686..2d060fd4305077 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -6,10 +6,10 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s ; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT-W64 %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT-W32 %s -; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT-W32 %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s ; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT-W64 %s ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT-W32 %s ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT-W64 %s >From f8705fbe9f9c78148ca0a0360caf2650ab546185 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 1 Nov 2024 02:07:03 +0000 Subject: [PATCH 4/4] Fix formatting (again). --- llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp index fb2ef7b7ed2d71..bf0ec39ab6c6e7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUExpandPseudoIntrinsics.cpp @@ -51,8 +51,8 @@ AMDGPUExpandPseudoIntrinsicsPass::run(Module &M, ModuleAnalysisManager &) { if (ST.getCPU().empty() || ST.getCPU() == "generic") return PreservedAnalyses::all(); - if (auto WS = - Intrinsic::getDeclarationIfExists(&M, Intrinsic::amdgcn_wavefrontsize)) + if (auto WS = Intrinsic::getDeclarationIfExists( + &M, Intrinsic::amdgcn_wavefrontsize)) return expandWaveSizeIntrinsic(ST, WS); return PreservedAnalyses::all(); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits