https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/122277
From 27d929a270ea1d8d3fa885f00794a092af12e50e Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin <mirko.brkusa...@amd.com> Date: Mon, 23 Dec 2024 12:25:25 +0100 Subject: [PATCH 1/2] [AMDGPU] Remove s_wakeup_barrier instruction --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 - llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 6 ----- .../AMDGPU/AMDGPUInstructionSelector.cpp | 5 ---- llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp | 1 - .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 -- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 27 ++++--------------- llvm/lib/Target/AMDGPU/SOPInstructions.td | 12 --------- llvm/test/CodeGen/AMDGPU/s-barrier.ll | 9 ------- llvm/test/MC/AMDGPU/gfx12_asm_sop1.s | 9 ------- .../Disassembler/AMDGPU/gfx12_dasm_sop1.txt | 9 ------- 10 files changed, 5 insertions(+), 76 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 14c1746716cdd6..1b29a8e359c205 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -489,7 +489,6 @@ TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts") diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..b930d6983e2251 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -284,12 +284,6 @@ def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">, Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; -// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier) -// The %barrier argument must be uniform, otherwise behavior is undefined. -def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">, - Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, - IntrNoCallback, IntrNoFree]>; - // void @llvm.amdgcn.s.barrier.wait(i16 %barrierType) def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">, Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 041b9b4d66f63f..50e0faef9e7c27 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2239,7 +2239,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( case Intrinsic::amdgcn_s_barrier_signal_var: return selectNamedBarrierInit(I, IntrinsicID); case Intrinsic::amdgcn_s_barrier_join: - case Intrinsic::amdgcn_s_wakeup_barrier: case Intrinsic::amdgcn_s_get_named_barrier_state: return selectNamedBarrierInst(I, IntrinsicID); case Intrinsic::amdgcn_s_get_barrier_state: @@ -5839,8 +5838,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) { llvm_unreachable("not a named barrier op"); case Intrinsic::amdgcn_s_barrier_join: return AMDGPU::S_BARRIER_JOIN_IMM; - case Intrinsic::amdgcn_s_wakeup_barrier: - return AMDGPU::S_WAKEUP_BARRIER_IMM; case Intrinsic::amdgcn_s_get_named_barrier_state: return AMDGPU::S_GET_BARRIER_STATE_IMM; }; @@ -5850,8 +5847,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) { llvm_unreachable("not a named barrier op"); case Intrinsic::amdgcn_s_barrier_join: return AMDGPU::S_BARRIER_JOIN_M0; - case Intrinsic::amdgcn_s_wakeup_barrier: - return AMDGPU::S_WAKEUP_BARRIER_M0; case Intrinsic::amdgcn_s_get_named_barrier_state: return AMDGPU::S_GET_BARRIER_STATE_M0; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp index 2df068d8fb007b..0406ba9c68ccd3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp @@ -326,7 +326,6 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { case Intrinsic::amdgcn_s_barrier_wait: case Intrinsic::amdgcn_s_barrier_leave: case Intrinsic::amdgcn_s_get_barrier_state: - case Intrinsic::amdgcn_s_wakeup_barrier: case Intrinsic::amdgcn_wave_barrier: case Intrinsic::amdgcn_sched_barrier: case Intrinsic::amdgcn_sched_group_barrier: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 08e23cbf34e42b..224c368cff4a1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3304,7 +3304,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(B, MI, 1); return; case Intrinsic::amdgcn_s_barrier_join: - case Intrinsic::amdgcn_s_wakeup_barrier: constrainOpWithReadfirstlane(B, MI, 1); return; case Intrinsic::amdgcn_s_barrier_init: @@ -5272,7 +5271,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); break; case Intrinsic::amdgcn_s_barrier_join: - case Intrinsic::amdgcn_s_wakeup_barrier: OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); break; case Intrinsic::amdgcn_s_barrier_init: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0ac84f4e1f02af..7e07c2234a132e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10076,8 +10076,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops); return SDValue(NewMI, 0); } - case Intrinsic::amdgcn_s_barrier_join: - case Intrinsic::amdgcn_s_wakeup_barrier: { + case Intrinsic::amdgcn_s_barrier_join: { // these three intrinsics have one operand: barrier pointer SDValue Chain = Op->getOperand(0); SmallVector<SDValue, 2> Ops; @@ -10086,32 +10085,16 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, if (isa<ConstantSDNode>(BarOp)) { uint64_t BarVal = cast<ConstantSDNode>(BarOp)->getZExtValue(); - switch (IntrinsicID) { - default: - return SDValue(); - case Intrinsic::amdgcn_s_barrier_join: - Opc = AMDGPU::S_BARRIER_JOIN_IMM; - break; - case Intrinsic::amdgcn_s_wakeup_barrier: - Opc = AMDGPU::S_WAKEUP_BARRIER_IMM; - break; - } + Opc = AMDGPU::S_BARRIER_JOIN_IMM; + // extract the BarrierID from bits 4-9 of the immediate unsigned BarID = (BarVal >> 4) & 0x3F; SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32); Ops.push_back(K); Ops.push_back(Chain); } else { - switch (IntrinsicID) { - default: - return SDValue(); - case Intrinsic::amdgcn_s_barrier_join: - Opc = AMDGPU::S_BARRIER_JOIN_M0; - break; - case Intrinsic::amdgcn_s_wakeup_barrier: - Opc = AMDGPU::S_WAKEUP_BARRIER_M0; - break; - } + Opc = AMDGPU::S_BARRIER_JOIN_M0; + // extract the BarrierID from bits 4-9 of BarOp, copy to M0[5:0] SDValue M0Val; M0Val = DAG.getNode(ISD::SRL, DL, MVT::i32, BarOp, diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 46ac2a4992c456..da186f7058d18a 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -488,11 +488,6 @@ def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins), let isConvergent = 1; } -def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins), - "", []>{ - let SchedRW = [WriteBarrier]; - let isConvergent = 1; -} } // End Uses = [M0] def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs), @@ -514,11 +509,6 @@ def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs), let isConvergent = 1; } -def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs), - (ins SplitBarrier:$src0), "$src0", []>{ - let SchedRW = [WriteBarrier]; - let isConvergent = 1; -} } // End has_sdst = 0 def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst), @@ -2092,13 +2082,11 @@ defm S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_M0_Real_gfx12<0x04f>; defm S_GET_BARRIER_STATE_M0 : SOP1_M0_Real_gfx12<0x050>; defm S_BARRIER_INIT_M0 : SOP1_M0_Real_gfx12<0x051>; defm S_BARRIER_JOIN_M0 : SOP1_M0_Real_gfx12<0x052>; -defm S_WAKEUP_BARRIER_M0 : SOP1_M0_Real_gfx12<0x057>; defm S_BARRIER_SIGNAL_IMM : SOP1_IMM_Real_gfx12<0x04e>; defm S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_IMM_Real_gfx12<0x04f>; defm S_GET_BARRIER_STATE_IMM : SOP1_IMM_Real_gfx12<0x050>; defm S_BARRIER_INIT_IMM : SOP1_IMM_Real_gfx12<0x051>; defm S_BARRIER_JOIN_IMM : SOP1_IMM_Real_gfx12<0x052>; -defm S_WAKEUP_BARRIER_IMM : SOP1_IMM_Real_gfx12<0x057>; defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>; //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier.ll b/llvm/test/CodeGen/AMDGPU/s-barrier.ll index a2624e5f61307a..83a077f7f74db1 100644 --- a/llvm/test/CodeGen/AMDGPU/s-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/s-barrier.ll @@ -112,10 +112,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) ; GFX12-SDAG-NEXT: s_mov_b32 m0, 2 ; GFX12-SDAG-NEXT: s_barrier_wait 1 ; GFX12-SDAG-NEXT: s_barrier_leave -; GFX12-SDAG-NEXT: s_wakeup_barrier m0 -; GFX12-SDAG-NEXT: s_mov_b32 m0, s2 -; GFX12-SDAG-NEXT: s_wakeup_barrier m0 -; GFX12-SDAG-NEXT: s_mov_b32 m0, 2 ; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, s2 ; GFX12-SDAG-NEXT: s_get_barrier_state s2, m0 @@ -176,8 +172,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) ; GFX12-GISEL-NEXT: s_barrier_join m0 ; GFX12-GISEL-NEXT: s_barrier_wait 1 ; GFX12-GISEL-NEXT: s_barrier_leave -; GFX12-GISEL-NEXT: s_wakeup_barrier 2 -; GFX12-GISEL-NEXT: s_wakeup_barrier m0 ; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0 @@ -218,8 +212,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in) call void @llvm.amdgcn.s.barrier.wait(i16 1) call void @llvm.amdgcn.s.barrier.leave(i16 1) - call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) @bar) - call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %in) %state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar) %state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in) call void @llvm.amdgcn.s.barrier() @@ -295,7 +287,6 @@ declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1 declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1 declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1 declare void @llvm.amdgcn.s.barrier.leave(i16) #1 -declare void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3)) #1 declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1 declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1 diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s index 939320e9ef2dce..d93ea2e82c1d28 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s @@ -726,15 +726,6 @@ s_barrier_join -2 s_barrier_join m0 // GFX12: encoding: [0x7d,0x52,0x80,0xbe] -s_wakeup_barrier 1 -// GFX12: encoding: [0x81,0x57,0x80,0xbe] - -s_wakeup_barrier -1 -// GFX12: encoding: [0xc1,0x57,0x80,0xbe] - -s_wakeup_barrier m0 -// GFX12: encoding: [0x7d,0x57,0x80,0xbe] - s_get_barrier_state s3, -1 // GFX12: encoding: [0xc1,0x50,0x83,0xbe] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt index f8c235f77b5f56..2cb6da42213e32 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt @@ -726,15 +726,6 @@ # GFX12: s_barrier_join m0 ; encoding: [0x7d,0x52,0x80,0xbe] 0x7d,0x52,0x80,0xbe -# GFX12: s_wakeup_barrier 1 ; encoding: [0x81,0x57,0x80,0xbe] -0x81,0x57,0x80,0xbe - -# GFX12: s_wakeup_barrier -1 ; encoding: [0xc1,0x57,0x80,0xbe] -0xc1,0x57,0x80,0xbe - -# GFX12: s_wakeup_barrier m0 ; encoding: [0x7d,0x57,0x80,0xbe] -0x7d,0x57,0x80,0xbe - # GFX12: s_get_barrier_state s3, -1 ; encoding: [0xc1,0x50,0x83,0xbe] 0xc1,0x50,0x83,0xbe From ffee5cec4e03b6eda7221c8699b98618ac26a105 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin <mirko.brkusa...@amd.com> Date: Thu, 9 Jan 2025 17:08:04 +0100 Subject: [PATCH 2/2] update clang opencl test --- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl index b1866a8e492c84..5b5ae419f0a4a9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl @@ -173,21 +173,6 @@ void test_s_barrier_join(void *bar) __builtin_amdgcn_s_barrier_join(bar); } -// CHECK-LABEL: @test_s_wakeup_barrier( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr -// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) -// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]]) -// CHECK-NEXT: ret void -// -void test_s_wakeup_barrier(void *bar) -{ - __builtin_amdgcn_s_wakeup_barrier(bar); -} - // CHECK-LABEL: @test_s_barrier_leave( // CHECK-NEXT: entry: // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits